머신러닝 알고리즘 적용 _ 심화

- 데이터 : 사용자 데이터 + 상품 데이터(과거 상품 구매 이력)
- 신규 모델 : RandomForest, ExtraTrees, BaggingClassifier, (XGBoost)
- 업데이트된 데이터 + 기존 모델(DT, LR) 평가척도 
- 업데이트된 데이터 + 신규 모델 평가척도
- [+2] 피쳐 엔지니어링
- [+2] 매개변수 조정
- 캐글 제출 
- 머신러닝 파이프라인 흐름도 기록

In [1]:
import pandas as pd
import numpy as np
import pickle
import time
import operator
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import log_loss, f1_score, accuracy_score

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
# 신규 데이터 로딩

trn = pd.read_csv('../input/train_append_lb_lag.csv').fillna(0)
target = pd.DataFrame(pickle.load(open('../input/target.pkl','rb')), columns=['target'])
temp = pd.read_csv('../input/test_clean.csv')
test_id = temp['ncodpers']
tst = pd.read_csv('../input/test_append_lb_lag.csv').fillna(0)
print(trn.shape, target.shape, tst.shape)


(45619, 246) (45619, 1) (929615, 246)

In [3]:
trn.head()


Out[3]:
age antiguedad canal_entrada cod_prov conyuemp fecha_alta ind_actividad_cliente ind_empleado ind_nuevo indext ... indrel_lag_fiv indrel_1mes_lag_fiv indresi_lag_fiv nomprov_lag_fiv pais_residencia_lag_fiv renta_lag_fiv segmento_lag_fiv sexo_lag_fiv tiprel_1mes_lag_fiv ult_fec_cli_1t_lag_fiv
0 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
3 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
4 40 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -153.0

5 rows × 246 columns


In [4]:
trn.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45619 entries, 0 to 45618
Columns: 246 entries, age to ult_fec_cli_1t_lag_fiv
dtypes: float64(225), int64(21)
memory usage: 85.6 MB

In [5]:
# 신규 데이터 설명
for col in trn.columns:
    print(col)


age
antiguedad
canal_entrada
cod_prov
conyuemp
fecha_alta
ind_actividad_cliente
ind_empleado
ind_nuevo
indext
indfall
indrel
indrel_1mes
indresi
nomprov
pais_residencia
renta
segmento
sexo
tiprel_1mes
ult_fec_cli_1t
age_lag_one
antiguedad_lag_one
canal_entrada_lag_one
cod_prov_lag_one
conyuemp_lag_one
fecha_alta_lag_one
ind_actividad_cliente_lag_one
ind_ahor_fin_ult1_lag_one
ind_aval_fin_ult1_lag_one
ind_cco_fin_ult1_lag_one
ind_cder_fin_ult1_lag_one
ind_cno_fin_ult1_lag_one
ind_ctju_fin_ult1_lag_one
ind_ctma_fin_ult1_lag_one
ind_ctop_fin_ult1_lag_one
ind_ctpp_fin_ult1_lag_one
ind_deco_fin_ult1_lag_one
ind_dela_fin_ult1_lag_one
ind_deme_fin_ult1_lag_one
ind_ecue_fin_ult1_lag_one
ind_empleado_lag_one
ind_fond_fin_ult1_lag_one
ind_hip_fin_ult1_lag_one
ind_nom_pens_ult1_lag_one
ind_nomina_ult1_lag_one
ind_nuevo_lag_one
ind_plan_fin_ult1_lag_one
ind_pres_fin_ult1_lag_one
ind_reca_fin_ult1_lag_one
ind_recibo_ult1_lag_one
ind_tjcr_fin_ult1_lag_one
ind_valo_fin_ult1_lag_one
ind_viv_fin_ult1_lag_one
indext_lag_one
indfall_lag_one
indrel_lag_one
indrel_1mes_lag_one
indresi_lag_one
nomprov_lag_one
pais_residencia_lag_one
renta_lag_one
segmento_lag_one
sexo_lag_one
tiprel_1mes_lag_one
ult_fec_cli_1t_lag_one
age_lag_two
antiguedad_lag_two
canal_entrada_lag_two
cod_prov_lag_two
conyuemp_lag_two
fecha_alta_lag_two
ind_actividad_cliente_lag_two
ind_ahor_fin_ult1_lag_two
ind_aval_fin_ult1_lag_two
ind_cco_fin_ult1_lag_two
ind_cder_fin_ult1_lag_two
ind_cno_fin_ult1_lag_two
ind_ctju_fin_ult1_lag_two
ind_ctma_fin_ult1_lag_two
ind_ctop_fin_ult1_lag_two
ind_ctpp_fin_ult1_lag_two
ind_deco_fin_ult1_lag_two
ind_dela_fin_ult1_lag_two
ind_deme_fin_ult1_lag_two
ind_ecue_fin_ult1_lag_two
ind_empleado_lag_two
ind_fond_fin_ult1_lag_two
ind_hip_fin_ult1_lag_two
ind_nom_pens_ult1_lag_two
ind_nomina_ult1_lag_two
ind_nuevo_lag_two
ind_plan_fin_ult1_lag_two
ind_pres_fin_ult1_lag_two
ind_reca_fin_ult1_lag_two
ind_recibo_ult1_lag_two
ind_tjcr_fin_ult1_lag_two
ind_valo_fin_ult1_lag_two
ind_viv_fin_ult1_lag_two
indext_lag_two
indfall_lag_two
indrel_lag_two
indrel_1mes_lag_two
indresi_lag_two
nomprov_lag_two
pais_residencia_lag_two
renta_lag_two
segmento_lag_two
sexo_lag_two
tiprel_1mes_lag_two
ult_fec_cli_1t_lag_two
age_lag_thr
antiguedad_lag_thr
canal_entrada_lag_thr
cod_prov_lag_thr
conyuemp_lag_thr
fecha_alta_lag_thr
ind_actividad_cliente_lag_thr
ind_ahor_fin_ult1_lag_thr
ind_aval_fin_ult1_lag_thr
ind_cco_fin_ult1_lag_thr
ind_cder_fin_ult1_lag_thr
ind_cno_fin_ult1_lag_thr
ind_ctju_fin_ult1_lag_thr
ind_ctma_fin_ult1_lag_thr
ind_ctop_fin_ult1_lag_thr
ind_ctpp_fin_ult1_lag_thr
ind_deco_fin_ult1_lag_thr
ind_dela_fin_ult1_lag_thr
ind_deme_fin_ult1_lag_thr
ind_ecue_fin_ult1_lag_thr
ind_empleado_lag_thr
ind_fond_fin_ult1_lag_thr
ind_hip_fin_ult1_lag_thr
ind_nom_pens_ult1_lag_thr
ind_nomina_ult1_lag_thr
ind_nuevo_lag_thr
ind_plan_fin_ult1_lag_thr
ind_pres_fin_ult1_lag_thr
ind_reca_fin_ult1_lag_thr
ind_recibo_ult1_lag_thr
ind_tjcr_fin_ult1_lag_thr
ind_valo_fin_ult1_lag_thr
ind_viv_fin_ult1_lag_thr
indext_lag_thr
indfall_lag_thr
indrel_lag_thr
indrel_1mes_lag_thr
indresi_lag_thr
nomprov_lag_thr
pais_residencia_lag_thr
renta_lag_thr
segmento_lag_thr
sexo_lag_thr
tiprel_1mes_lag_thr
ult_fec_cli_1t_lag_thr
age_lag_fou
antiguedad_lag_fou
canal_entrada_lag_fou
cod_prov_lag_fou
conyuemp_lag_fou
fecha_alta_lag_fou
ind_actividad_cliente_lag_fou
ind_ahor_fin_ult1_lag_fou
ind_aval_fin_ult1_lag_fou
ind_cco_fin_ult1_lag_fou
ind_cder_fin_ult1_lag_fou
ind_cno_fin_ult1_lag_fou
ind_ctju_fin_ult1_lag_fou
ind_ctma_fin_ult1_lag_fou
ind_ctop_fin_ult1_lag_fou
ind_ctpp_fin_ult1_lag_fou
ind_deco_fin_ult1_lag_fou
ind_dela_fin_ult1_lag_fou
ind_deme_fin_ult1_lag_fou
ind_ecue_fin_ult1_lag_fou
ind_empleado_lag_fou
ind_fond_fin_ult1_lag_fou
ind_hip_fin_ult1_lag_fou
ind_nom_pens_ult1_lag_fou
ind_nomina_ult1_lag_fou
ind_nuevo_lag_fou
ind_plan_fin_ult1_lag_fou
ind_pres_fin_ult1_lag_fou
ind_reca_fin_ult1_lag_fou
ind_recibo_ult1_lag_fou
ind_tjcr_fin_ult1_lag_fou
ind_valo_fin_ult1_lag_fou
ind_viv_fin_ult1_lag_fou
indext_lag_fou
indfall_lag_fou
indrel_lag_fou
indrel_1mes_lag_fou
indresi_lag_fou
nomprov_lag_fou
pais_residencia_lag_fou
renta_lag_fou
segmento_lag_fou
sexo_lag_fou
tiprel_1mes_lag_fou
ult_fec_cli_1t_lag_fou
age_lag_fiv
antiguedad_lag_fiv
canal_entrada_lag_fiv
cod_prov_lag_fiv
conyuemp_lag_fiv
fecha_alta_lag_fiv
ind_actividad_cliente_lag_fiv
ind_ahor_fin_ult1_lag_fiv
ind_aval_fin_ult1_lag_fiv
ind_cco_fin_ult1_lag_fiv
ind_cder_fin_ult1_lag_fiv
ind_cno_fin_ult1_lag_fiv
ind_ctju_fin_ult1_lag_fiv
ind_ctma_fin_ult1_lag_fiv
ind_ctop_fin_ult1_lag_fiv
ind_ctpp_fin_ult1_lag_fiv
ind_deco_fin_ult1_lag_fiv
ind_dela_fin_ult1_lag_fiv
ind_deme_fin_ult1_lag_fiv
ind_ecue_fin_ult1_lag_fiv
ind_empleado_lag_fiv
ind_fond_fin_ult1_lag_fiv
ind_hip_fin_ult1_lag_fiv
ind_nom_pens_ult1_lag_fiv
ind_nomina_ult1_lag_fiv
ind_nuevo_lag_fiv
ind_plan_fin_ult1_lag_fiv
ind_pres_fin_ult1_lag_fiv
ind_reca_fin_ult1_lag_fiv
ind_recibo_ult1_lag_fiv
ind_tjcr_fin_ult1_lag_fiv
ind_valo_fin_ult1_lag_fiv
ind_viv_fin_ult1_lag_fiv
indext_lag_fiv
indfall_lag_fiv
indrel_lag_fiv
indrel_1mes_lag_fiv
indresi_lag_fiv
nomprov_lag_fiv
pais_residencia_lag_fiv
renta_lag_fiv
segmento_lag_fiv
sexo_lag_fiv
tiprel_1mes_lag_fiv
ult_fec_cli_1t_lag_fiv

In [6]:
# 훈련 데이터와 테스트 데이터 동일 여부 확인
trn.columns == tst.columns


Out[6]:
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True], dtype=bool)

In [7]:
# 빈도가 낮은 타겟은 사전에 제거 (이유: 교차 검증에 활용할 수 없음 + 너무 빈도가 낮아 무의미함)
rem_targets = [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 21, 22, 23]  # 18 classes
trn = trn[target['target'].isin(rem_targets)]
target = target[target['target'].isin(rem_targets)]
target = LabelEncoder().fit_transform(target)

for t in np.unique(target):
    print(t, sum(target==t))


C:\Users\Byeon\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py:129: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
0 9452
1 1934
2 55
3 349
4 222
5 154
6 503
7 33
8 1085
9 1219
10 246
11 21
12 2942
13 4733
14 159
15 5151
16 8218
17 9119

평가용 함수 정의


In [8]:
def evaluate(x, y, model):
    trn_scores = dict(); vld_scores = dict()
    sss = StratifiedShuffleSplit(n_splits=3, test_size=0.1, random_state=777)
    for t_ind, v_ind in sss.split(x,y):
        # split data
        x_trn, x_vld = x.iloc[t_ind], x.iloc[v_ind]
        y_trn, y_vld = y[t_ind], y[v_ind]

        # fit model
        model.fit(x_trn, y_trn)
        
        # eval _ trn        
        preds = model.predict_proba(x_trn)

        log_scores = trn_scores.get('log loss', [])
        log_scores.append(log_loss(y_trn, preds))
        trn_scores['log loss'] = log_scores

        # eval _ vld
        preds = model.predict_proba(x_vld)

        log_scores = vld_scores.get('log loss', [])
        log_scores.append(log_loss(y_vld, preds))
        vld_scores['log loss'] = log_scores
    return trn_scores, vld_scores

def print_scores(trn_scores, vld_scores):
    prefix = '        '
    cols = ['log loss']
    print('='*50)
    print('TRAIN EVAL')
    for col in cols:
        print('-'*50)
        print('# {}'.format(col))
        print('# {} Mean : {}'.format(prefix, np.mean(trn_scores[col])))
        print('# {} Raw  : {}'.format(prefix, trn_scores[col]))

    print('='*50)
    print('VALID EVAL')
    for col in cols:
        print('-'*50)
        print('# {}'.format(col))
        print('# {} Mean : {}'.format(prefix, np.mean(vld_scores[col])))
        print('# {} Raw  : {}'.format(prefix, vld_scores[col]))

def print_time(end, start):
    print('='*50)
    elapsed = end - start
    print('{} secs'.format(round(elapsed)))
    
def fit_and_eval(trn, target, model):
    trn_scores, vld_scores = evaluate(trn,target,model)
    print_scores(trn_scores, vld_scores)
    print_time(time.time(), st)

모델 학습 및 평가

- 모델 종류
    - Decision Tree : 트리 기반 모델
    - Logistic Regression : 선형 모델
    - RandomForest, ExtraTrees : 트리 기반 앙상블 모델
    - BaggingClassifier : 앙상블 모델
    - (XGBoost) : 트리 기반 앙상블 모델

- 훈련/검증 데이터 기반 평가 척도
    - Log Loss

- 검증 데이터 Log Loss 목표 수치 = 1.1

2회차 모델


In [9]:
st = time.time()
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(max_depth=5,random_state=777)
fit_and_eval(trn.fillna(0), target, dt_model)
# 9 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3109224350534252
#          Raw  : [1.3115981739687124, 1.3107068679354577, 1.310462263256106]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3779936666060042
#          Raw  : [1.3575320170809917, 1.3871952618582359, 1.3892537208787852]
==================================================
4 secs

3회차 모델


In [10]:
st = time.time()
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(max_depth=10, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, rf_model)
# 5 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.201159088622055
#          Raw  : [1.1926486558865388, 1.2100953404685806, 1.2007332695110449]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3143710007821832
#          Raw  : [1.2947880460294008, 1.3242720506188492, 1.3240529056982995]
==================================================
3 secs

In [11]:
st = time.time()
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(max_depth=20, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, rf_model)
# 5 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 0.6666717538543528
#          Raw  : [0.66829393750261501, 0.66145394035539729, 0.67026738370504657]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 2.166382771489643
#          Raw  : [2.1254981115532927, 2.1710336052809236, 2.2026165976347123]
==================================================
5 secs

In [12]:
st = time.time()
from sklearn.ensemble import ExtraTreesClassifier

et_model = ExtraTreesClassifier(max_depth=10, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, et_model)
# 6 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.1864977396096574
#          Raw  : [1.1971863785361296, 1.1778108146519104, 1.1844960256409323]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2861678250955952
#          Raw  : [1.2981150084925899, 1.2791127823209658, 1.28127568447323]
==================================================
3 secs

In [13]:
st = time.time()
from sklearn.ensemble import BaggingClassifier

bg_model = BaggingClassifier(n_estimators=5, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, bg_model)
# 75 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 0.48384125830774316
#          Raw  : [0.48381222376256705, 0.48285650980844452, 0.48485504135221785]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 9.577640107983079
#          Raw  : [9.6512237278370669, 9.3366553224998405, 9.745041273612328]
==================================================
38 secs

In [14]:
st = time.time()
from sklearn.ensemble import BaggingClassifier

bg_model = BaggingClassifier(n_estimators=10, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, bg_model)
# 75 sec


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 0.39795397341465205
#          Raw  : [0.39815431830011555, 0.3979483382233987, 0.39775926372044201]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 7.016678028708846
#          Raw  : [7.1605635920216182, 6.8222240819288231, 7.0672464121760976]
==================================================
49 secs

모델 매개변수 및 주요 변수 시각화 (1)

- Decision Tree, RandomForest, ExtraTrees 전용

In [15]:
# Utility

def observe_model_tree(trn, model):
    print('='*50)
    print(model)
    
    print('='*50)
    print('# Feature Importance')
    print(model.feature_importances_)
    
    print('-'*50)
    print('# Mapped to Column Name')
    prefix = '    '
    feature_importance = dict()
    for i, f_imp in enumerate(model.feature_importances_):
        print('{} {} \t {}'.format(prefix, round(f_imp,5), trn.columns[i]))
        feature_importance[trn.columns[i]] = f_imp

    print('-'*50)
    print('# Sorted Feature Importance')
    feature_importance_sorted = sorted(feature_importance.items(), key=operator.itemgetter(1), reverse=True)
    for item in feature_importance_sorted:
        print('{} {} \t {}'.format(prefix, round(item[1],5), item[0]))
    
    return feature_importance_sorted

def plot_fimp(fimp):
    x = []; y = []
    for item in fimp:
        x.append(item[0])
        y.append(item[1])

    f, ax = plt.subplots(figsize=(20, 15))
    sns.barplot(x,y,alpha=0.5)
    ax.set_title('Feature Importance for Model : Decision Tree')
    ax.set(xlabel='Column Name', ylabel='Feature Importance')

In [16]:
# 모델 상세 보기
dt_fimp = observe_model_tree(trn, dt_model)


==================================================
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=777, splitter='best')
==================================================
# Feature Importance
[  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   7.12470739e-03   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.82035690e-01   0.00000000e+00
   1.04508987e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.52613436e-01   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.38309707e-01   1.99355530e-02
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   1.39887391e-03   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   2.86428234e-01   6.23968253e-02   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   4.21462327e-02
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.21178325e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.88036323e-02   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.66885761e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.36620898e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   2.36816617e-04   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   3.56508942e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00]
--------------------------------------------------
# Mapped to Column Name
     0.0 	 age
     0.0 	 antiguedad
     0.0 	 canal_entrada
     0.0 	 cod_prov
     0.0 	 conyuemp
     0.00712 	 fecha_alta
     0.0 	 ind_actividad_cliente
     0.0 	 ind_empleado
     0.0 	 ind_nuevo
     0.0 	 indext
     0.0 	 indfall
     0.0 	 indrel
     0.0 	 indrel_1mes
     0.0 	 indresi
     0.0 	 nomprov
     0.0 	 pais_residencia
     0.0 	 renta
     0.0 	 segmento
     0.0 	 sexo
     0.0 	 tiprel_1mes
     0.0 	 ult_fec_cli_1t
     0.0 	 age_lag_one
     0.0 	 antiguedad_lag_one
     0.0 	 canal_entrada_lag_one
     0.0 	 cod_prov_lag_one
     0.0 	 conyuemp_lag_one
     0.0 	 fecha_alta_lag_one
     0.0 	 ind_actividad_cliente_lag_one
     0.0 	 ind_ahor_fin_ult1_lag_one
     0.0 	 ind_aval_fin_ult1_lag_one
     0.18204 	 ind_cco_fin_ult1_lag_one
     0.0 	 ind_cder_fin_ult1_lag_one
     0.01045 	 ind_cno_fin_ult1_lag_one
     0.0 	 ind_ctju_fin_ult1_lag_one
     0.0 	 ind_ctma_fin_ult1_lag_one
     0.0 	 ind_ctop_fin_ult1_lag_one
     0.0 	 ind_ctpp_fin_ult1_lag_one
     0.0 	 ind_deco_fin_ult1_lag_one
     0.0 	 ind_dela_fin_ult1_lag_one
     0.0 	 ind_deme_fin_ult1_lag_one
     0.0 	 ind_ecue_fin_ult1_lag_one
     0.0 	 ind_empleado_lag_one
     0.0 	 ind_fond_fin_ult1_lag_one
     0.0 	 ind_hip_fin_ult1_lag_one
     0.15261 	 ind_nom_pens_ult1_lag_one
     0.0 	 ind_nomina_ult1_lag_one
     0.0 	 ind_nuevo_lag_one
     0.0 	 ind_plan_fin_ult1_lag_one
     0.0 	 ind_pres_fin_ult1_lag_one
     0.0 	 ind_reca_fin_ult1_lag_one
     0.13831 	 ind_recibo_ult1_lag_one
     0.01994 	 ind_tjcr_fin_ult1_lag_one
     0.0 	 ind_valo_fin_ult1_lag_one
     0.0 	 ind_viv_fin_ult1_lag_one
     0.0 	 indext_lag_one
     0.0 	 indfall_lag_one
     0.0 	 indrel_lag_one
     0.0 	 indrel_1mes_lag_one
     0.0 	 indresi_lag_one
     0.0 	 nomprov_lag_one
     0.0 	 pais_residencia_lag_one
     0.0 	 renta_lag_one
     0.0 	 segmento_lag_one
     0.0 	 sexo_lag_one
     0.0 	 tiprel_1mes_lag_one
     0.0 	 ult_fec_cli_1t_lag_one
     0.0 	 age_lag_two
     0.0 	 antiguedad_lag_two
     0.0 	 canal_entrada_lag_two
     0.0 	 cod_prov_lag_two
     0.0 	 conyuemp_lag_two
     0.0 	 fecha_alta_lag_two
     0.0 	 ind_actividad_cliente_lag_two
     0.0 	 ind_ahor_fin_ult1_lag_two
     0.0 	 ind_aval_fin_ult1_lag_two
     0.0 	 ind_cco_fin_ult1_lag_two
     0.0 	 ind_cder_fin_ult1_lag_two
     0.0014 	 ind_cno_fin_ult1_lag_two
     0.0 	 ind_ctju_fin_ult1_lag_two
     0.0 	 ind_ctma_fin_ult1_lag_two
     0.0 	 ind_ctop_fin_ult1_lag_two
     0.0 	 ind_ctpp_fin_ult1_lag_two
     0.0 	 ind_deco_fin_ult1_lag_two
     0.0 	 ind_dela_fin_ult1_lag_two
     0.0 	 ind_deme_fin_ult1_lag_two
     0.0 	 ind_ecue_fin_ult1_lag_two
     0.0 	 ind_empleado_lag_two
     0.0 	 ind_fond_fin_ult1_lag_two
     0.0 	 ind_hip_fin_ult1_lag_two
     0.28643 	 ind_nom_pens_ult1_lag_two
     0.0624 	 ind_nomina_ult1_lag_two
     0.0 	 ind_nuevo_lag_two
     0.0 	 ind_plan_fin_ult1_lag_two
     0.0 	 ind_pres_fin_ult1_lag_two
     0.0 	 ind_reca_fin_ult1_lag_two
     0.04215 	 ind_recibo_ult1_lag_two
     0.0 	 ind_tjcr_fin_ult1_lag_two
     0.0 	 ind_valo_fin_ult1_lag_two
     0.0 	 ind_viv_fin_ult1_lag_two
     0.0 	 indext_lag_two
     0.0 	 indfall_lag_two
     0.0 	 indrel_lag_two
     0.0 	 indrel_1mes_lag_two
     0.0 	 indresi_lag_two
     0.0 	 nomprov_lag_two
     0.0 	 pais_residencia_lag_two
     0.0 	 renta_lag_two
     0.0 	 segmento_lag_two
     0.0 	 sexo_lag_two
     0.0 	 tiprel_1mes_lag_two
     0.0 	 ult_fec_cli_1t_lag_two
     0.0 	 age_lag_thr
     0.0 	 antiguedad_lag_thr
     0.0 	 canal_entrada_lag_thr
     0.0 	 cod_prov_lag_thr
     0.0 	 conyuemp_lag_thr
     0.0 	 fecha_alta_lag_thr
     0.0 	 ind_actividad_cliente_lag_thr
     0.0 	 ind_ahor_fin_ult1_lag_thr
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.0 	 ind_cco_fin_ult1_lag_thr
     0.0 	 ind_cder_fin_ult1_lag_thr
     0.0 	 ind_cno_fin_ult1_lag_thr
     0.0 	 ind_ctju_fin_ult1_lag_thr
     0.01212 	 ind_ctma_fin_ult1_lag_thr
     0.0 	 ind_ctop_fin_ult1_lag_thr
     0.0 	 ind_ctpp_fin_ult1_lag_thr
     0.0 	 ind_deco_fin_ult1_lag_thr
     0.0 	 ind_dela_fin_ult1_lag_thr
     0.0 	 ind_deme_fin_ult1_lag_thr
     0.0 	 ind_ecue_fin_ult1_lag_thr
     0.0 	 ind_empleado_lag_thr
     0.0 	 ind_fond_fin_ult1_lag_thr
     0.0 	 ind_hip_fin_ult1_lag_thr
     0.0188 	 ind_nom_pens_ult1_lag_thr
     0.0 	 ind_nomina_ult1_lag_thr
     0.0 	 ind_nuevo_lag_thr
     0.0 	 ind_plan_fin_ult1_lag_thr
     0.0 	 ind_pres_fin_ult1_lag_thr
     0.0 	 ind_reca_fin_ult1_lag_thr
     0.01669 	 ind_recibo_ult1_lag_thr
     0.0 	 ind_tjcr_fin_ult1_lag_thr
     0.0 	 ind_valo_fin_ult1_lag_thr
     0.0 	 ind_viv_fin_ult1_lag_thr
     0.0 	 indext_lag_thr
     0.0 	 indfall_lag_thr
     0.0 	 indrel_lag_thr
     0.0 	 indrel_1mes_lag_thr
     0.0 	 indresi_lag_thr
     0.0 	 nomprov_lag_thr
     0.0 	 pais_residencia_lag_thr
     0.0 	 renta_lag_thr
     0.0 	 segmento_lag_thr
     0.0 	 sexo_lag_thr
     0.0 	 tiprel_1mes_lag_thr
     0.0 	 ult_fec_cli_1t_lag_thr
     0.01366 	 age_lag_fou
     0.0 	 antiguedad_lag_fou
     0.0 	 canal_entrada_lag_fou
     0.0 	 cod_prov_lag_fou
     0.0 	 conyuemp_lag_fou
     0.0 	 fecha_alta_lag_fou
     0.0 	 ind_actividad_cliente_lag_fou
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.00024 	 ind_cco_fin_ult1_lag_fou
     0.0 	 ind_cder_fin_ult1_lag_fou
     0.0 	 ind_cno_fin_ult1_lag_fou
     0.0 	 ind_ctju_fin_ult1_lag_fou
     0.0 	 ind_ctma_fin_ult1_lag_fou
     0.0 	 ind_ctop_fin_ult1_lag_fou
     0.0 	 ind_ctpp_fin_ult1_lag_fou
     0.0 	 ind_deco_fin_ult1_lag_fou
     0.0 	 ind_dela_fin_ult1_lag_fou
     0.0 	 ind_deme_fin_ult1_lag_fou
     0.0 	 ind_ecue_fin_ult1_lag_fou
     0.0 	 ind_empleado_lag_fou
     0.0 	 ind_fond_fin_ult1_lag_fou
     0.0 	 ind_hip_fin_ult1_lag_fou
     0.0 	 ind_nom_pens_ult1_lag_fou
     0.0 	 ind_nomina_ult1_lag_fou
     0.0 	 ind_nuevo_lag_fou
     0.0 	 ind_plan_fin_ult1_lag_fou
     0.0 	 ind_pres_fin_ult1_lag_fou
     0.0 	 ind_reca_fin_ult1_lag_fou
     0.0 	 ind_recibo_ult1_lag_fou
     0.0 	 ind_tjcr_fin_ult1_lag_fou
     0.0 	 ind_valo_fin_ult1_lag_fou
     0.0 	 ind_viv_fin_ult1_lag_fou
     0.0 	 indext_lag_fou
     0.0 	 indfall_lag_fou
     0.0 	 indrel_lag_fou
     0.0 	 indrel_1mes_lag_fou
     0.0 	 indresi_lag_fou
     0.0 	 nomprov_lag_fou
     0.0 	 pais_residencia_lag_fou
     0.0 	 renta_lag_fou
     0.0 	 segmento_lag_fou
     0.0 	 sexo_lag_fou
     0.0 	 tiprel_1mes_lag_fou
     0.0 	 ult_fec_cli_1t_lag_fou
     0.0 	 age_lag_fiv
     0.0 	 antiguedad_lag_fiv
     0.0 	 canal_entrada_lag_fiv
     0.0 	 cod_prov_lag_fiv
     0.0 	 conyuemp_lag_fiv
     0.0 	 fecha_alta_lag_fiv
     0.0 	 ind_actividad_cliente_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     0.0 	 ind_aval_fin_ult1_lag_fiv
     0.0 	 ind_cco_fin_ult1_lag_fiv
     0.0 	 ind_cder_fin_ult1_lag_fiv
     0.0 	 ind_cno_fin_ult1_lag_fiv
     0.0 	 ind_ctju_fin_ult1_lag_fiv
     0.0 	 ind_ctma_fin_ult1_lag_fiv
     0.0 	 ind_ctop_fin_ult1_lag_fiv
     0.0 	 ind_ctpp_fin_ult1_lag_fiv
     0.0 	 ind_deco_fin_ult1_lag_fiv
     0.0 	 ind_dela_fin_ult1_lag_fiv
     0.0 	 ind_deme_fin_ult1_lag_fiv
     0.0 	 ind_ecue_fin_ult1_lag_fiv
     0.0 	 ind_empleado_lag_fiv
     0.0 	 ind_fond_fin_ult1_lag_fiv
     0.0 	 ind_hip_fin_ult1_lag_fiv
     0.03565 	 ind_nom_pens_ult1_lag_fiv
     0.0 	 ind_nomina_ult1_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.0 	 ind_plan_fin_ult1_lag_fiv
     0.0 	 ind_pres_fin_ult1_lag_fiv
     0.0 	 ind_reca_fin_ult1_lag_fiv
     0.0 	 ind_recibo_ult1_lag_fiv
     0.0 	 ind_tjcr_fin_ult1_lag_fiv
     0.0 	 ind_valo_fin_ult1_lag_fiv
     0.0 	 ind_viv_fin_ult1_lag_fiv
     0.0 	 indext_lag_fiv
     0.0 	 indfall_lag_fiv
     0.0 	 indrel_lag_fiv
     0.0 	 indrel_1mes_lag_fiv
     0.0 	 indresi_lag_fiv
     0.0 	 nomprov_lag_fiv
     0.0 	 pais_residencia_lag_fiv
     0.0 	 renta_lag_fiv
     0.0 	 segmento_lag_fiv
     0.0 	 sexo_lag_fiv
     0.0 	 tiprel_1mes_lag_fiv
     0.0 	 ult_fec_cli_1t_lag_fiv
--------------------------------------------------
# Sorted Feature Importance
     0.28643 	 ind_nom_pens_ult1_lag_two
     0.18204 	 ind_cco_fin_ult1_lag_one
     0.15261 	 ind_nom_pens_ult1_lag_one
     0.13831 	 ind_recibo_ult1_lag_one
     0.0624 	 ind_nomina_ult1_lag_two
     0.04215 	 ind_recibo_ult1_lag_two
     0.03565 	 ind_nom_pens_ult1_lag_fiv
     0.01994 	 ind_tjcr_fin_ult1_lag_one
     0.0188 	 ind_nom_pens_ult1_lag_thr
     0.01669 	 ind_recibo_ult1_lag_thr
     0.01366 	 age_lag_fou
     0.01212 	 ind_ctma_fin_ult1_lag_thr
     0.01045 	 ind_cno_fin_ult1_lag_one
     0.00712 	 fecha_alta
     0.0014 	 ind_cno_fin_ult1_lag_two
     0.00024 	 ind_cco_fin_ult1_lag_fou
     0.0 	 ult_fec_cli_1t_lag_one
     0.0 	 ind_dela_fin_ult1_lag_thr
     0.0 	 ind_cder_fin_ult1_lag_two
     0.0 	 sexo
     0.0 	 renta_lag_two
     0.0 	 nomprov_lag_fou
     0.0 	 ult_fec_cli_1t_lag_fiv
     0.0 	 ind_cder_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.0 	 indext_lag_one
     0.0 	 ind_tjcr_fin_ult1_lag_two
     0.0 	 cod_prov
     0.0 	 ind_valo_fin_ult1_lag_fou
     0.0 	 pais_residencia_lag_fou
     0.0 	 ind_ahor_fin_ult1_lag_thr
     0.0 	 ind_ctpp_fin_ult1_lag_fiv
     0.0 	 renta
     0.0 	 ind_tjcr_fin_ult1_lag_fiv
     0.0 	 indext_lag_fiv
     0.0 	 nomprov
     0.0 	 tiprel_1mes_lag_fiv
     0.0 	 ind_cno_fin_ult1_lag_thr
     0.0 	 ind_cco_fin_ult1_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     0.0 	 ind_viv_fin_ult1_lag_one
     0.0 	 fecha_alta_lag_fou
     0.0 	 ind_recibo_ult1_lag_fou
     0.0 	 ind_hip_fin_ult1_lag_one
     0.0 	 ind_ctop_fin_ult1_lag_two
     0.0 	 ind_empleado_lag_one
     0.0 	 ult_fec_cli_1t_lag_thr
     0.0 	 conyuemp_lag_two
     0.0 	 ind_cno_fin_ult1_lag_fiv
     0.0 	 sexo_lag_fiv
     0.0 	 cod_prov_lag_two
     0.0 	 ind_reca_fin_ult1_lag_fou
     0.0 	 ind_valo_fin_ult1_lag_two
     0.0 	 ind_cder_fin_ult1_lag_one
     0.0 	 indrel_lag_fiv
     0.0 	 ind_ctpp_fin_ult1_lag_thr
     0.0 	 indrel_1mes_lag_two
     0.0 	 ind_reca_fin_ult1_lag_fiv
     0.0 	 ind_pres_fin_ult1_lag_fou
     0.0 	 ind_empleado_lag_fou
     0.0 	 ind_ctma_fin_ult1_lag_fiv
     0.0 	 ind_actividad_cliente
     0.0 	 cod_prov_lag_one
     0.0 	 canal_entrada_lag_thr
     0.0 	 fecha_alta_lag_thr
     0.0 	 ind_nomina_ult1_lag_thr
     0.0 	 ind_viv_fin_ult1_lag_thr
     0.0 	 ind_valo_fin_ult1_lag_one
     0.0 	 ind_fond_fin_ult1_lag_one
     0.0 	 ind_pres_fin_ult1_lag_thr
     0.0 	 ind_valo_fin_ult1_lag_fiv
     0.0 	 pais_residencia_lag_one
     0.0 	 ind_cco_fin_ult1_lag_thr
     0.0 	 cod_prov_lag_fou
     0.0 	 canal_entrada_lag_fou
     0.0 	 segmento_lag_one
     0.0 	 pais_residencia_lag_fiv
     0.0 	 ind_dela_fin_ult1_lag_one
     0.0 	 indext
     0.0 	 ind_hip_fin_ult1_lag_two
     0.0 	 renta_lag_one
     0.0 	 indrel_1mes_lag_one
     0.0 	 ind_pres_fin_ult1_lag_fiv
     0.0 	 ind_empleado
     0.0 	 antiguedad
     0.0 	 nomprov_lag_two
     0.0 	 segmento_lag_two
     0.0 	 ind_deco_fin_ult1_lag_one
     0.0 	 pais_residencia
     0.0 	 indext_lag_thr
     0.0 	 indfall
     0.0 	 ind_actividad_cliente_lag_thr
     0.0 	 indfall_lag_thr
     0.0 	 ind_ecue_fin_ult1_lag_fou
     0.0 	 indresi_lag_one
     0.0 	 ind_empleado_lag_thr
     0.0 	 segmento
     0.0 	 fecha_alta_lag_one
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 antiguedad_lag_thr
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.0 	 ind_cco_fin_ult1_lag_two
     0.0 	 ind_empleado_lag_fiv
     0.0 	 sexo_lag_thr
     0.0 	 nomprov_lag_thr
     0.0 	 conyuemp_lag_fiv
     0.0 	 fecha_alta_lag_fiv
     0.0 	 ind_pres_fin_ult1_lag_one
     0.0 	 ind_ecue_fin_ult1_lag_two
     0.0 	 ind_reca_fin_ult1_lag_two
     0.0 	 ind_deme_fin_ult1_lag_thr
     0.0 	 conyuemp
     0.0 	 ind_reca_fin_ult1_lag_one
     0.0 	 ult_fec_cli_1t
     0.0 	 ind_ctpp_fin_ult1_lag_two
     0.0 	 ind_viv_fin_ult1_lag_fiv
     0.0 	 ind_dela_fin_ult1_lag_two
     0.0 	 antiguedad_lag_fou
     0.0 	 ind_nuevo_lag_fou
     0.0 	 ind_ctju_fin_ult1_lag_fiv
     0.0 	 ind_valo_fin_ult1_lag_thr
     0.0 	 indext_lag_two
     0.0 	 indresi_lag_two
     0.0 	 ind_hip_fin_ult1_lag_thr
     0.0 	 ind_nuevo_lag_two
     0.0 	 ind_ctop_fin_ult1_lag_one
     0.0 	 indfall_lag_fiv
     0.0 	 ind_deme_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_one
     0.0 	 ind_reca_fin_ult1_lag_thr
     0.0 	 ult_fec_cli_1t_lag_two
     0.0 	 age_lag_two
     0.0 	 ind_ctpp_fin_ult1_lag_one
     0.0 	 indrel_lag_fou
     0.0 	 ind_tjcr_fin_ult1_lag_thr
     0.0 	 ind_nuevo_lag_one
     0.0 	 ind_ctju_fin_ult1_lag_two
     0.0 	 indresi_lag_fiv
     0.0 	 tiprel_1mes_lag_fou
     0.0 	 ult_fec_cli_1t_lag_fou
     0.0 	 indfall_lag_one
     0.0 	 renta_lag_fou
     0.0 	 indext_lag_fou
     0.0 	 ind_ctma_fin_ult1_lag_two
     0.0 	 conyuemp_lag_fou
     0.0 	 ind_ctpp_fin_ult1_lag_fou
     0.0 	 ind_nom_pens_ult1_lag_fou
     0.0 	 ind_deme_fin_ult1_lag_fiv
     0.0 	 canal_entrada_lag_one
     0.0 	 ind_hip_fin_ult1_lag_fiv
     0.0 	 ind_ctma_fin_ult1_lag_one
     0.0 	 ind_deme_fin_ult1_lag_two
     0.0 	 indrel
     0.0 	 ind_ahor_fin_ult1_lag_one
     0.0 	 ind_fond_fin_ult1_lag_fou
     0.0 	 ind_recibo_ult1_lag_fiv
     0.0 	 ind_deco_fin_ult1_lag_fou
     0.0 	 nomprov_lag_one
     0.0 	 ind_fond_fin_ult1_lag_two
     0.0 	 tiprel_1mes_lag_one
     0.0 	 ind_nuevo
     0.0 	 indrel_1mes_lag_thr
     0.0 	 ind_deco_fin_ult1_lag_thr
     0.0 	 canal_entrada_lag_fiv
     0.0 	 ind_ctju_fin_ult1_lag_fou
     0.0 	 conyuemp_lag_thr
     0.0 	 ind_deco_fin_ult1_lag_two
     0.0 	 fecha_alta_lag_two
     0.0 	 canal_entrada_lag_two
     0.0 	 ind_viv_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fiv
     0.0 	 ind_deme_fin_ult1_lag_one
     0.0 	 sexo_lag_one
     0.0 	 cod_prov_lag_fiv
     0.0 	 age_lag_one
     0.0 	 ind_aval_fin_ult1_lag_two
     0.0 	 ind_plan_fin_ult1_lag_thr
     0.0 	 ind_ctop_fin_ult1_lag_fou
     0.0 	 ind_nuevo_lag_thr
     0.0 	 tiprel_1mes_lag_two
     0.0 	 antiguedad_lag_one
     0.0 	 indresi_lag_fou
     0.0 	 cod_prov_lag_thr
     0.0 	 ind_empleado_lag_two
     0.0 	 indresi_lag_thr
     0.0 	 conyuemp_lag_one
     0.0 	 ind_plan_fin_ult1_lag_fou
     0.0 	 ind_ctju_fin_ult1_lag_thr
     0.0 	 indrel_lag_one
     0.0 	 ind_cno_fin_ult1_lag_fou
     0.0 	 ind_dela_fin_ult1_lag_fiv
     0.0 	 ind_actividad_cliente_lag_two
     0.0 	 ind_ecue_fin_ult1_lag_thr
     0.0 	 ind_plan_fin_ult1_lag_one
     0.0 	 indrel_lag_thr
     0.0 	 age
     0.0 	 ind_tjcr_fin_ult1_lag_fou
     0.0 	 ind_ctop_fin_ult1_lag_fiv
     0.0 	 segmento_lag_fou
     0.0 	 segmento_lag_fiv
     0.0 	 ind_nomina_ult1_lag_one
     0.0 	 canal_entrada
     0.0 	 age_lag_fiv
     0.0 	 ind_deco_fin_ult1_lag_fiv
     0.0 	 indrel_lag_two
     0.0 	 segmento_lag_thr
     0.0 	 ind_ecue_fin_ult1_lag_one
     0.0 	 indrel_1mes_lag_fiv
     0.0 	 renta_lag_fiv
     0.0 	 age_lag_thr
     0.0 	 renta_lag_thr
     0.0 	 ind_dela_fin_ult1_lag_fou
     0.0 	 indfall_lag_two
     0.0 	 ind_plan_fin_ult1_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.0 	 indresi
     0.0 	 sexo_lag_fou
     0.0 	 antiguedad_lag_two
     0.0 	 ind_fond_fin_ult1_lag_thr
     0.0 	 ind_nomina_ult1_lag_fiv
     0.0 	 ind_plan_fin_ult1_lag_two
     0.0 	 ind_viv_fin_ult1_lag_two
     0.0 	 ind_pres_fin_ult1_lag_two
     0.0 	 antiguedad_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_two
     0.0 	 ind_ctop_fin_ult1_lag_thr
     0.0 	 ind_actividad_cliente_lag_one
     0.0 	 ind_actividad_cliente_lag_fou
     0.0 	 tiprel_1mes_lag_thr
     0.0 	 ind_ecue_fin_ult1_lag_fiv
     0.0 	 ind_ctju_fin_ult1_lag_one
     0.0 	 pais_residencia_lag_thr
     0.0 	 indrel_1mes_lag_fou
     0.0 	 indrel_1mes
     0.0 	 ind_ctma_fin_ult1_lag_fou
     0.0 	 ind_actividad_cliente_lag_fiv
     0.0 	 ind_hip_fin_ult1_lag_fou
     0.0 	 ind_cder_fin_ult1_lag_fiv
     0.0 	 indfall_lag_fou
     0.0 	 sexo_lag_two
     0.0 	 tiprel_1mes
     0.0 	 ind_fond_fin_ult1_lag_fiv
     0.0 	 ind_cder_fin_ult1_lag_thr
     0.0 	 nomprov_lag_fiv
     0.0 	 pais_residencia_lag_two
     0.0 	 ind_nomina_ult1_lag_fou

In [22]:
# 주요 변수 시각화
plot_fimp(dt_fimp)



In [23]:
# 모델 상세 보기
rf_fimp = observe_model_tree(trn, rf_model)


==================================================
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=10, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=-1, oob_score=False, random_state=777,
            verbose=0, warm_start=False)
==================================================
# Feature Importance
[  2.71608365e-02   9.53519959e-03   3.88301303e-03   2.28874070e-03
   0.00000000e+00   6.57678052e-03   8.74417537e-04   3.06648148e-05
   1.21417512e-03   4.39033270e-04   1.36754165e-05   3.07072666e-05
   1.06782248e-06   9.47888981e-06   2.75825089e-03   2.86905252e-05
   3.08559961e-03   7.60711602e-03   9.29999158e-04   1.04699917e-03
   4.22859953e-05   9.29198640e-03   3.10912939e-03   3.19439309e-02
   1.36434185e-03   0.00000000e+00   1.19456972e-02   1.44681491e-02
   0.00000000e+00   0.00000000e+00   5.94572239e-02   3.34672687e-05
   1.16552066e-02   1.30182812e-03   3.53770111e-03   1.44773235e-04
   1.87499580e-04   7.61612694e-04   3.28405120e-04   5.86880926e-05
   4.80893435e-04   0.00000000e+00   4.57316081e-05   4.59753008e-05
   5.19686834e-02   5.14788278e-02   1.00234827e-04   5.06094069e-05
   0.00000000e+00   2.46161002e-03   5.08717765e-02   1.59576146e-02
   2.25858191e-04   5.52105315e-05   1.82265651e-04   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.06557621e-02   9.95418434e-03
   1.06572242e-04   4.27317044e-03   2.16729963e-03   4.71156904e-04
   1.37479288e-03   6.41873011e-04   2.24190709e-02   1.39459585e-03
   2.38214581e-03   1.26856976e-03   2.16237331e-03   5.70343006e-03
   8.91670361e-05   0.00000000e+00   0.00000000e+00   1.97924332e-02
   0.00000000e+00   2.85169982e-03   9.81940802e-04   5.23995621e-03
   1.53393544e-04   9.19712728e-05   4.75149238e-04   9.27879132e-04
   1.45631279e-05   7.29965834e-04   2.78690580e-04   9.50165545e-05
   8.23717646e-05   5.33856796e-02   1.01940701e-02   9.91356289e-05
   3.35222425e-05   1.82171352e-05   1.53510788e-03   6.67784464e-03
   3.46042124e-03   1.62803234e-04   2.94033115e-05   8.29261694e-05
   0.00000000e+00   0.00000000e+00   5.01200138e-06   8.68038744e-06
   7.13777981e-04   1.65975189e-02   1.96131366e-03   1.25792226e-03
   1.49760738e-04   5.90064454e-05   4.95529549e-04   9.53593404e-03
   2.79861270e-03   6.43113842e-04   1.19840666e-03   2.74090575e-04
   2.64824114e-03   4.88022780e-04   0.00000000e+00   0.00000000e+00
   1.41866045e-03   1.54891741e-05   1.26470724e-02   4.26246634e-04
   6.61405564e-03   1.07136449e-04   1.69180268e-04   2.30016599e-03
   7.25052929e-04   1.96507216e-05   1.27333277e-03   1.13607972e-04
   8.25832318e-05   8.83424342e-05   1.74120532e-02   2.40779706e-02
   1.45737924e-04   7.98674493e-05   2.17853173e-05   7.76609336e-04
   4.59263235e-03   7.34065446e-03   1.67631054e-04   3.44164164e-05
   1.93881120e-04   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.08468041e-05   1.25652193e-03   1.92059578e-04   1.49188379e-03
   2.34975952e-03   3.04474337e-04   2.38915195e-03   5.52634710e-05
   7.71534189e-03   4.22366802e-03   1.26316590e-03   8.84033479e-04
   0.00000000e+00   1.80246208e-03   1.72486458e-04   0.00000000e+00
   0.00000000e+00   7.14194380e-03   1.86850775e-05   1.42167595e-02
   0.00000000e+00   7.61077485e-05   5.09562503e-04   1.75638772e-04
   0.00000000e+00   4.34832694e-04   5.75014978e-05   3.62932732e-04
   1.20685623e-04   6.86990484e-05   5.12865420e-05   2.26993003e-02
   7.14893988e-03   9.44927064e-05   6.65649457e-05   9.87457348e-05
   1.47004702e-03   5.31744210e-03   3.43983570e-03   2.17804508e-04
   8.63956129e-05   2.17274537e-04   1.94718581e-05   1.23778583e-05
   0.00000000e+00   9.12323252e-05   1.04929815e-03   4.61495392e-03
   1.44568685e-03   2.10642489e-03   2.51385574e-04   1.47303906e-04
   8.70309599e-06   1.50146620e-02   4.31344112e-03   7.35652331e-04
   1.04441811e-03   1.30404928e-04   3.31023716e-03   1.45298510e-03
   0.00000000e+00   0.00000000e+00   3.42955446e-02   1.17652919e-05
   4.47550598e-02   0.00000000e+00   6.54560645e-05   2.16505899e-04
   1.71072405e-04   0.00000000e+00   5.37195559e-04   0.00000000e+00
   4.78322228e-04   4.48786824e-03   6.36835002e-05   1.94415363e-04
   5.35995770e-02   3.16454703e-02   0.00000000e+00   1.19886101e-04
   0.00000000e+00   2.38217214e-03   8.07674159e-03   5.36105675e-03
   1.20442992e-04   3.90855970e-05   1.88628629e-04   0.00000000e+00
   4.27188001e-05   0.00000000e+00   1.04494735e-03   1.82994333e-03
   2.24121120e-04   1.38039907e-03   7.43092083e-04   1.18824902e-04
   1.41011080e-04   5.05005044e-04]
--------------------------------------------------
# Mapped to Column Name
     0.02716 	 age
     0.00954 	 antiguedad
     0.00388 	 canal_entrada
     0.00229 	 cod_prov
     0.0 	 conyuemp
     0.00658 	 fecha_alta
     0.00087 	 ind_actividad_cliente
     3e-05 	 ind_empleado
     0.00121 	 ind_nuevo
     0.00044 	 indext
     1e-05 	 indfall
     3e-05 	 indrel
     0.0 	 indrel_1mes
     1e-05 	 indresi
     0.00276 	 nomprov
     3e-05 	 pais_residencia
     0.00309 	 renta
     0.00761 	 segmento
     0.00093 	 sexo
     0.00105 	 tiprel_1mes
     4e-05 	 ult_fec_cli_1t
     0.00929 	 age_lag_one
     0.00311 	 antiguedad_lag_one
     0.03194 	 canal_entrada_lag_one
     0.00136 	 cod_prov_lag_one
     0.0 	 conyuemp_lag_one
     0.01195 	 fecha_alta_lag_one
     0.01447 	 ind_actividad_cliente_lag_one
     0.0 	 ind_ahor_fin_ult1_lag_one
     0.0 	 ind_aval_fin_ult1_lag_one
     0.05946 	 ind_cco_fin_ult1_lag_one
     3e-05 	 ind_cder_fin_ult1_lag_one
     0.01166 	 ind_cno_fin_ult1_lag_one
     0.0013 	 ind_ctju_fin_ult1_lag_one
     0.00354 	 ind_ctma_fin_ult1_lag_one
     0.00014 	 ind_ctop_fin_ult1_lag_one
     0.00019 	 ind_ctpp_fin_ult1_lag_one
     0.00076 	 ind_deco_fin_ult1_lag_one
     0.00033 	 ind_dela_fin_ult1_lag_one
     6e-05 	 ind_deme_fin_ult1_lag_one
     0.00048 	 ind_ecue_fin_ult1_lag_one
     0.0 	 ind_empleado_lag_one
     5e-05 	 ind_fond_fin_ult1_lag_one
     5e-05 	 ind_hip_fin_ult1_lag_one
     0.05197 	 ind_nom_pens_ult1_lag_one
     0.05148 	 ind_nomina_ult1_lag_one
     0.0001 	 ind_nuevo_lag_one
     5e-05 	 ind_plan_fin_ult1_lag_one
     0.0 	 ind_pres_fin_ult1_lag_one
     0.00246 	 ind_reca_fin_ult1_lag_one
     0.05087 	 ind_recibo_ult1_lag_one
     0.01596 	 ind_tjcr_fin_ult1_lag_one
     0.00023 	 ind_valo_fin_ult1_lag_one
     6e-05 	 ind_viv_fin_ult1_lag_one
     0.00018 	 indext_lag_one
     0.0 	 indfall_lag_one
     0.0 	 indrel_lag_one
     0.0 	 indrel_1mes_lag_one
     0.01066 	 indresi_lag_one
     0.00995 	 nomprov_lag_one
     0.00011 	 pais_residencia_lag_one
     0.00427 	 renta_lag_one
     0.00217 	 segmento_lag_one
     0.00047 	 sexo_lag_one
     0.00137 	 tiprel_1mes_lag_one
     0.00064 	 ult_fec_cli_1t_lag_one
     0.02242 	 age_lag_two
     0.00139 	 antiguedad_lag_two
     0.00238 	 canal_entrada_lag_two
     0.00127 	 cod_prov_lag_two
     0.00216 	 conyuemp_lag_two
     0.0057 	 fecha_alta_lag_two
     9e-05 	 ind_actividad_cliente_lag_two
     0.0 	 ind_ahor_fin_ult1_lag_two
     0.0 	 ind_aval_fin_ult1_lag_two
     0.01979 	 ind_cco_fin_ult1_lag_two
     0.0 	 ind_cder_fin_ult1_lag_two
     0.00285 	 ind_cno_fin_ult1_lag_two
     0.00098 	 ind_ctju_fin_ult1_lag_two
     0.00524 	 ind_ctma_fin_ult1_lag_two
     0.00015 	 ind_ctop_fin_ult1_lag_two
     9e-05 	 ind_ctpp_fin_ult1_lag_two
     0.00048 	 ind_deco_fin_ult1_lag_two
     0.00093 	 ind_dela_fin_ult1_lag_two
     1e-05 	 ind_deme_fin_ult1_lag_two
     0.00073 	 ind_ecue_fin_ult1_lag_two
     0.00028 	 ind_empleado_lag_two
     0.0001 	 ind_fond_fin_ult1_lag_two
     8e-05 	 ind_hip_fin_ult1_lag_two
     0.05339 	 ind_nom_pens_ult1_lag_two
     0.01019 	 ind_nomina_ult1_lag_two
     0.0001 	 ind_nuevo_lag_two
     3e-05 	 ind_plan_fin_ult1_lag_two
     2e-05 	 ind_pres_fin_ult1_lag_two
     0.00154 	 ind_reca_fin_ult1_lag_two
     0.00668 	 ind_recibo_ult1_lag_two
     0.00346 	 ind_tjcr_fin_ult1_lag_two
     0.00016 	 ind_valo_fin_ult1_lag_two
     3e-05 	 ind_viv_fin_ult1_lag_two
     8e-05 	 indext_lag_two
     0.0 	 indfall_lag_two
     0.0 	 indrel_lag_two
     1e-05 	 indrel_1mes_lag_two
     1e-05 	 indresi_lag_two
     0.00071 	 nomprov_lag_two
     0.0166 	 pais_residencia_lag_two
     0.00196 	 renta_lag_two
     0.00126 	 segmento_lag_two
     0.00015 	 sexo_lag_two
     6e-05 	 tiprel_1mes_lag_two
     0.0005 	 ult_fec_cli_1t_lag_two
     0.00954 	 age_lag_thr
     0.0028 	 antiguedad_lag_thr
     0.00064 	 canal_entrada_lag_thr
     0.0012 	 cod_prov_lag_thr
     0.00027 	 conyuemp_lag_thr
     0.00265 	 fecha_alta_lag_thr
     0.00049 	 ind_actividad_cliente_lag_thr
     0.0 	 ind_ahor_fin_ult1_lag_thr
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.00142 	 ind_cco_fin_ult1_lag_thr
     2e-05 	 ind_cder_fin_ult1_lag_thr
     0.01265 	 ind_cno_fin_ult1_lag_thr
     0.00043 	 ind_ctju_fin_ult1_lag_thr
     0.00661 	 ind_ctma_fin_ult1_lag_thr
     0.00011 	 ind_ctop_fin_ult1_lag_thr
     0.00017 	 ind_ctpp_fin_ult1_lag_thr
     0.0023 	 ind_deco_fin_ult1_lag_thr
     0.00073 	 ind_dela_fin_ult1_lag_thr
     2e-05 	 ind_deme_fin_ult1_lag_thr
     0.00127 	 ind_ecue_fin_ult1_lag_thr
     0.00011 	 ind_empleado_lag_thr
     8e-05 	 ind_fond_fin_ult1_lag_thr
     9e-05 	 ind_hip_fin_ult1_lag_thr
     0.01741 	 ind_nom_pens_ult1_lag_thr
     0.02408 	 ind_nomina_ult1_lag_thr
     0.00015 	 ind_nuevo_lag_thr
     8e-05 	 ind_plan_fin_ult1_lag_thr
     2e-05 	 ind_pres_fin_ult1_lag_thr
     0.00078 	 ind_reca_fin_ult1_lag_thr
     0.00459 	 ind_recibo_ult1_lag_thr
     0.00734 	 ind_tjcr_fin_ult1_lag_thr
     0.00017 	 ind_valo_fin_ult1_lag_thr
     3e-05 	 ind_viv_fin_ult1_lag_thr
     0.00019 	 indext_lag_thr
     0.0 	 indfall_lag_thr
     0.0 	 indrel_lag_thr
     0.0 	 indrel_1mes_lag_thr
     1e-05 	 indresi_lag_thr
     0.00126 	 nomprov_lag_thr
     0.00019 	 pais_residencia_lag_thr
     0.00149 	 renta_lag_thr
     0.00235 	 segmento_lag_thr
     0.0003 	 sexo_lag_thr
     0.00239 	 tiprel_1mes_lag_thr
     6e-05 	 ult_fec_cli_1t_lag_thr
     0.00772 	 age_lag_fou
     0.00422 	 antiguedad_lag_fou
     0.00126 	 canal_entrada_lag_fou
     0.00088 	 cod_prov_lag_fou
     0.0 	 conyuemp_lag_fou
     0.0018 	 fecha_alta_lag_fou
     0.00017 	 ind_actividad_cliente_lag_fou
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.00714 	 ind_cco_fin_ult1_lag_fou
     2e-05 	 ind_cder_fin_ult1_lag_fou
     0.01422 	 ind_cno_fin_ult1_lag_fou
     0.0 	 ind_ctju_fin_ult1_lag_fou
     8e-05 	 ind_ctma_fin_ult1_lag_fou
     0.00051 	 ind_ctop_fin_ult1_lag_fou
     0.00018 	 ind_ctpp_fin_ult1_lag_fou
     0.0 	 ind_deco_fin_ult1_lag_fou
     0.00043 	 ind_dela_fin_ult1_lag_fou
     6e-05 	 ind_deme_fin_ult1_lag_fou
     0.00036 	 ind_ecue_fin_ult1_lag_fou
     0.00012 	 ind_empleado_lag_fou
     7e-05 	 ind_fond_fin_ult1_lag_fou
     5e-05 	 ind_hip_fin_ult1_lag_fou
     0.0227 	 ind_nom_pens_ult1_lag_fou
     0.00715 	 ind_nomina_ult1_lag_fou
     9e-05 	 ind_nuevo_lag_fou
     7e-05 	 ind_plan_fin_ult1_lag_fou
     0.0001 	 ind_pres_fin_ult1_lag_fou
     0.00147 	 ind_reca_fin_ult1_lag_fou
     0.00532 	 ind_recibo_ult1_lag_fou
     0.00344 	 ind_tjcr_fin_ult1_lag_fou
     0.00022 	 ind_valo_fin_ult1_lag_fou
     9e-05 	 ind_viv_fin_ult1_lag_fou
     0.00022 	 indext_lag_fou
     2e-05 	 indfall_lag_fou
     1e-05 	 indrel_lag_fou
     0.0 	 indrel_1mes_lag_fou
     9e-05 	 indresi_lag_fou
     0.00105 	 nomprov_lag_fou
     0.00461 	 pais_residencia_lag_fou
     0.00145 	 renta_lag_fou
     0.00211 	 segmento_lag_fou
     0.00025 	 sexo_lag_fou
     0.00015 	 tiprel_1mes_lag_fou
     1e-05 	 ult_fec_cli_1t_lag_fou
     0.01501 	 age_lag_fiv
     0.00431 	 antiguedad_lag_fiv
     0.00074 	 canal_entrada_lag_fiv
     0.00104 	 cod_prov_lag_fiv
     0.00013 	 conyuemp_lag_fiv
     0.00331 	 fecha_alta_lag_fiv
     0.00145 	 ind_actividad_cliente_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     0.0 	 ind_aval_fin_ult1_lag_fiv
     0.0343 	 ind_cco_fin_ult1_lag_fiv
     1e-05 	 ind_cder_fin_ult1_lag_fiv
     0.04476 	 ind_cno_fin_ult1_lag_fiv
     0.0 	 ind_ctju_fin_ult1_lag_fiv
     7e-05 	 ind_ctma_fin_ult1_lag_fiv
     0.00022 	 ind_ctop_fin_ult1_lag_fiv
     0.00017 	 ind_ctpp_fin_ult1_lag_fiv
     0.0 	 ind_deco_fin_ult1_lag_fiv
     0.00054 	 ind_dela_fin_ult1_lag_fiv
     0.0 	 ind_deme_fin_ult1_lag_fiv
     0.00048 	 ind_ecue_fin_ult1_lag_fiv
     0.00449 	 ind_empleado_lag_fiv
     6e-05 	 ind_fond_fin_ult1_lag_fiv
     0.00019 	 ind_hip_fin_ult1_lag_fiv
     0.0536 	 ind_nom_pens_ult1_lag_fiv
     0.03165 	 ind_nomina_ult1_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.00012 	 ind_plan_fin_ult1_lag_fiv
     0.0 	 ind_pres_fin_ult1_lag_fiv
     0.00238 	 ind_reca_fin_ult1_lag_fiv
     0.00808 	 ind_recibo_ult1_lag_fiv
     0.00536 	 ind_tjcr_fin_ult1_lag_fiv
     0.00012 	 ind_valo_fin_ult1_lag_fiv
     4e-05 	 ind_viv_fin_ult1_lag_fiv
     0.00019 	 indext_lag_fiv
     0.0 	 indfall_lag_fiv
     4e-05 	 indrel_lag_fiv
     0.0 	 indrel_1mes_lag_fiv
     0.00104 	 indresi_lag_fiv
     0.00183 	 nomprov_lag_fiv
     0.00022 	 pais_residencia_lag_fiv
     0.00138 	 renta_lag_fiv
     0.00074 	 segmento_lag_fiv
     0.00012 	 sexo_lag_fiv
     0.00014 	 tiprel_1mes_lag_fiv
     0.00051 	 ult_fec_cli_1t_lag_fiv
--------------------------------------------------
# Sorted Feature Importance
     0.05946 	 ind_cco_fin_ult1_lag_one
     0.0536 	 ind_nom_pens_ult1_lag_fiv
     0.05339 	 ind_nom_pens_ult1_lag_two
     0.05197 	 ind_nom_pens_ult1_lag_one
     0.05148 	 ind_nomina_ult1_lag_one
     0.05087 	 ind_recibo_ult1_lag_one
     0.04476 	 ind_cno_fin_ult1_lag_fiv
     0.0343 	 ind_cco_fin_ult1_lag_fiv
     0.03194 	 canal_entrada_lag_one
     0.03165 	 ind_nomina_ult1_lag_fiv
     0.02716 	 age
     0.02408 	 ind_nomina_ult1_lag_thr
     0.0227 	 ind_nom_pens_ult1_lag_fou
     0.02242 	 age_lag_two
     0.01979 	 ind_cco_fin_ult1_lag_two
     0.01741 	 ind_nom_pens_ult1_lag_thr
     0.0166 	 pais_residencia_lag_two
     0.01596 	 ind_tjcr_fin_ult1_lag_one
     0.01501 	 age_lag_fiv
     0.01447 	 ind_actividad_cliente_lag_one
     0.01422 	 ind_cno_fin_ult1_lag_fou
     0.01265 	 ind_cno_fin_ult1_lag_thr
     0.01195 	 fecha_alta_lag_one
     0.01166 	 ind_cno_fin_ult1_lag_one
     0.01066 	 indresi_lag_one
     0.01019 	 ind_nomina_ult1_lag_two
     0.00995 	 nomprov_lag_one
     0.00954 	 age_lag_thr
     0.00954 	 antiguedad
     0.00929 	 age_lag_one
     0.00808 	 ind_recibo_ult1_lag_fiv
     0.00772 	 age_lag_fou
     0.00761 	 segmento
     0.00734 	 ind_tjcr_fin_ult1_lag_thr
     0.00715 	 ind_nomina_ult1_lag_fou
     0.00714 	 ind_cco_fin_ult1_lag_fou
     0.00668 	 ind_recibo_ult1_lag_two
     0.00661 	 ind_ctma_fin_ult1_lag_thr
     0.00658 	 fecha_alta
     0.0057 	 fecha_alta_lag_two
     0.00536 	 ind_tjcr_fin_ult1_lag_fiv
     0.00532 	 ind_recibo_ult1_lag_fou
     0.00524 	 ind_ctma_fin_ult1_lag_two
     0.00461 	 pais_residencia_lag_fou
     0.00459 	 ind_recibo_ult1_lag_thr
     0.00449 	 ind_empleado_lag_fiv
     0.00431 	 antiguedad_lag_fiv
     0.00427 	 renta_lag_one
     0.00422 	 antiguedad_lag_fou
     0.00388 	 canal_entrada
     0.00354 	 ind_ctma_fin_ult1_lag_one
     0.00346 	 ind_tjcr_fin_ult1_lag_two
     0.00344 	 ind_tjcr_fin_ult1_lag_fou
     0.00331 	 fecha_alta_lag_fiv
     0.00311 	 antiguedad_lag_one
     0.00309 	 renta
     0.00285 	 ind_cno_fin_ult1_lag_two
     0.0028 	 antiguedad_lag_thr
     0.00276 	 nomprov
     0.00265 	 fecha_alta_lag_thr
     0.00246 	 ind_reca_fin_ult1_lag_one
     0.00239 	 tiprel_1mes_lag_thr
     0.00238 	 ind_reca_fin_ult1_lag_fiv
     0.00238 	 canal_entrada_lag_two
     0.00235 	 segmento_lag_thr
     0.0023 	 ind_deco_fin_ult1_lag_thr
     0.00229 	 cod_prov
     0.00217 	 segmento_lag_one
     0.00216 	 conyuemp_lag_two
     0.00211 	 segmento_lag_fou
     0.00196 	 renta_lag_two
     0.00183 	 nomprov_lag_fiv
     0.0018 	 fecha_alta_lag_fou
     0.00154 	 ind_reca_fin_ult1_lag_two
     0.00149 	 renta_lag_thr
     0.00147 	 ind_reca_fin_ult1_lag_fou
     0.00145 	 ind_actividad_cliente_lag_fiv
     0.00145 	 renta_lag_fou
     0.00142 	 ind_cco_fin_ult1_lag_thr
     0.00139 	 antiguedad_lag_two
     0.00138 	 renta_lag_fiv
     0.00137 	 tiprel_1mes_lag_one
     0.00136 	 cod_prov_lag_one
     0.0013 	 ind_ctju_fin_ult1_lag_one
     0.00127 	 ind_ecue_fin_ult1_lag_thr
     0.00127 	 cod_prov_lag_two
     0.00126 	 canal_entrada_lag_fou
     0.00126 	 segmento_lag_two
     0.00126 	 nomprov_lag_thr
     0.00121 	 ind_nuevo
     0.0012 	 cod_prov_lag_thr
     0.00105 	 nomprov_lag_fou
     0.00105 	 tiprel_1mes
     0.00104 	 indresi_lag_fiv
     0.00104 	 cod_prov_lag_fiv
     0.00098 	 ind_ctju_fin_ult1_lag_two
     0.00093 	 sexo
     0.00093 	 ind_dela_fin_ult1_lag_two
     0.00088 	 cod_prov_lag_fou
     0.00087 	 ind_actividad_cliente
     0.00078 	 ind_reca_fin_ult1_lag_thr
     0.00076 	 ind_deco_fin_ult1_lag_one
     0.00074 	 segmento_lag_fiv
     0.00074 	 canal_entrada_lag_fiv
     0.00073 	 ind_ecue_fin_ult1_lag_two
     0.00073 	 ind_dela_fin_ult1_lag_thr
     0.00071 	 nomprov_lag_two
     0.00064 	 canal_entrada_lag_thr
     0.00064 	 ult_fec_cli_1t_lag_one
     0.00054 	 ind_dela_fin_ult1_lag_fiv
     0.00051 	 ind_ctop_fin_ult1_lag_fou
     0.00051 	 ult_fec_cli_1t_lag_fiv
     0.0005 	 ult_fec_cli_1t_lag_two
     0.00049 	 ind_actividad_cliente_lag_thr
     0.00048 	 ind_ecue_fin_ult1_lag_one
     0.00048 	 ind_ecue_fin_ult1_lag_fiv
     0.00048 	 ind_deco_fin_ult1_lag_two
     0.00047 	 sexo_lag_one
     0.00044 	 indext
     0.00043 	 ind_dela_fin_ult1_lag_fou
     0.00043 	 ind_ctju_fin_ult1_lag_thr
     0.00036 	 ind_ecue_fin_ult1_lag_fou
     0.00033 	 ind_dela_fin_ult1_lag_one
     0.0003 	 sexo_lag_thr
     0.00028 	 ind_empleado_lag_two
     0.00027 	 conyuemp_lag_thr
     0.00025 	 sexo_lag_fou
     0.00023 	 ind_valo_fin_ult1_lag_one
     0.00022 	 pais_residencia_lag_fiv
     0.00022 	 ind_valo_fin_ult1_lag_fou
     0.00022 	 indext_lag_fou
     0.00022 	 ind_ctop_fin_ult1_lag_fiv
     0.00019 	 ind_hip_fin_ult1_lag_fiv
     0.00019 	 indext_lag_thr
     0.00019 	 pais_residencia_lag_thr
     0.00019 	 indext_lag_fiv
     0.00019 	 ind_ctpp_fin_ult1_lag_one
     0.00018 	 indext_lag_one
     0.00018 	 ind_ctpp_fin_ult1_lag_fou
     0.00017 	 ind_actividad_cliente_lag_fou
     0.00017 	 ind_ctpp_fin_ult1_lag_fiv
     0.00017 	 ind_ctpp_fin_ult1_lag_thr
     0.00017 	 ind_valo_fin_ult1_lag_thr
     0.00016 	 ind_valo_fin_ult1_lag_two
     0.00015 	 ind_ctop_fin_ult1_lag_two
     0.00015 	 sexo_lag_two
     0.00015 	 tiprel_1mes_lag_fou
     0.00015 	 ind_nuevo_lag_thr
     0.00014 	 ind_ctop_fin_ult1_lag_one
     0.00014 	 tiprel_1mes_lag_fiv
     0.00013 	 conyuemp_lag_fiv
     0.00012 	 ind_empleado_lag_fou
     0.00012 	 ind_valo_fin_ult1_lag_fiv
     0.00012 	 ind_plan_fin_ult1_lag_fiv
     0.00012 	 sexo_lag_fiv
     0.00011 	 ind_empleado_lag_thr
     0.00011 	 ind_ctop_fin_ult1_lag_thr
     0.00011 	 pais_residencia_lag_one
     0.0001 	 ind_nuevo_lag_one
     0.0001 	 ind_nuevo_lag_two
     0.0001 	 ind_pres_fin_ult1_lag_fou
     0.0001 	 ind_fond_fin_ult1_lag_two
     9e-05 	 ind_nuevo_lag_fou
     9e-05 	 ind_ctpp_fin_ult1_lag_two
     9e-05 	 indresi_lag_fou
     9e-05 	 ind_actividad_cliente_lag_two
     9e-05 	 ind_hip_fin_ult1_lag_thr
     9e-05 	 ind_viv_fin_ult1_lag_fou
     8e-05 	 indext_lag_two
     8e-05 	 ind_fond_fin_ult1_lag_thr
     8e-05 	 ind_hip_fin_ult1_lag_two
     8e-05 	 ind_plan_fin_ult1_lag_thr
     8e-05 	 ind_ctma_fin_ult1_lag_fou
     7e-05 	 ind_fond_fin_ult1_lag_fou
     7e-05 	 ind_plan_fin_ult1_lag_fou
     7e-05 	 ind_ctma_fin_ult1_lag_fiv
     6e-05 	 ind_fond_fin_ult1_lag_fiv
     6e-05 	 tiprel_1mes_lag_two
     6e-05 	 ind_deme_fin_ult1_lag_one
     6e-05 	 ind_deme_fin_ult1_lag_fou
     6e-05 	 ult_fec_cli_1t_lag_thr
     6e-05 	 ind_viv_fin_ult1_lag_one
     5e-05 	 ind_hip_fin_ult1_lag_fou
     5e-05 	 ind_plan_fin_ult1_lag_one
     5e-05 	 ind_hip_fin_ult1_lag_one
     5e-05 	 ind_fond_fin_ult1_lag_one
     4e-05 	 indrel_lag_fiv
     4e-05 	 ult_fec_cli_1t
     4e-05 	 ind_viv_fin_ult1_lag_fiv
     3e-05 	 ind_viv_fin_ult1_lag_thr
     3e-05 	 ind_plan_fin_ult1_lag_two
     3e-05 	 ind_cder_fin_ult1_lag_one
     3e-05 	 indrel
     3e-05 	 ind_empleado
     3e-05 	 ind_viv_fin_ult1_lag_two
     3e-05 	 pais_residencia
     2e-05 	 ind_pres_fin_ult1_lag_thr
     2e-05 	 ind_deme_fin_ult1_lag_thr
     2e-05 	 indfall_lag_fou
     2e-05 	 ind_cder_fin_ult1_lag_fou
     2e-05 	 ind_pres_fin_ult1_lag_two
     2e-05 	 ind_cder_fin_ult1_lag_thr
     1e-05 	 ind_deme_fin_ult1_lag_two
     1e-05 	 indfall
     1e-05 	 indrel_lag_fou
     1e-05 	 ind_cder_fin_ult1_lag_fiv
     1e-05 	 indresi_lag_thr
     1e-05 	 indresi
     1e-05 	 ult_fec_cli_1t_lag_fou
     1e-05 	 indresi_lag_two
     1e-05 	 indrel_1mes_lag_two
     0.0 	 indrel_1mes
     0.0 	 ind_ahor_fin_ult1_lag_one
     0.0 	 ind_empleado_lag_one
     0.0 	 indrel_lag_one
     0.0 	 indrel_1mes_lag_thr
     0.0 	 ind_pres_fin_ult1_lag_one
     0.0 	 indfall_lag_two
     0.0 	 ind_aval_fin_ult1_lag_one
     0.0 	 indfall_lag_fiv
     0.0 	 ind_deme_fin_ult1_lag_fiv
     0.0 	 conyuemp_lag_one
     0.0 	 ind_ahor_fin_ult1_lag_two
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 ind_ctju_fin_ult1_lag_fou
     0.0 	 ind_cder_fin_ult1_lag_two
     0.0 	 ind_aval_fin_ult1_lag_fiv
     0.0 	 indrel_1mes_lag_fou
     0.0 	 conyuemp
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.0 	 indrel_lag_thr
     0.0 	 indrel_lag_two
     0.0 	 indfall_lag_thr
     0.0 	 conyuemp_lag_fou
     0.0 	 ind_pres_fin_ult1_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     0.0 	 ind_deco_fin_ult1_lag_fiv
     0.0 	 ind_ctju_fin_ult1_lag_fiv
     0.0 	 indrel_1mes_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.0 	 indrel_1mes_lag_one
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.0 	 ind_ahor_fin_ult1_lag_thr
     0.0 	 indfall_lag_one
     0.0 	 ind_deco_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_two

In [24]:
# 주요 변수 시각화
plot_fimp(rf_fimp)



In [25]:
# 모델 상세 보기
et_fimp = observe_model_tree(trn, et_model)


==================================================
ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=10, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=-1, oob_score=False, random_state=777,
           verbose=0, warm_start=False)
==================================================
# Feature Importance
[  5.94683527e-03   1.39051943e-03   2.29888618e-03   5.07432305e-04
   3.87193509e-06   2.40474380e-03   8.72570150e-04   4.08645489e-05
   6.83559602e-03   2.99270510e-04   0.00000000e+00   4.46397330e-05
   3.27418070e-07   0.00000000e+00   3.83032823e-04   0.00000000e+00
   5.90506420e-04   4.35783654e-03   4.53010078e-04   1.91290467e-03
   4.52790009e-05   1.11181433e-02   8.84057974e-04   1.93381305e-03
   4.37125676e-04   1.87721515e-04   7.63249072e-04   2.27509835e-02
   9.65003318e-06   0.00000000e+00   8.29796439e-02   1.02106791e-05
   1.26656593e-02   1.05263531e-03   3.63365079e-04   2.43360101e-04
   1.78176511e-04   9.25012809e-04   7.15644145e-04   3.08925029e-05
   1.69805679e-03   7.96031150e-05   1.66352789e-04   9.02045124e-05
   3.97708172e-02   5.57928547e-02   4.21496401e-03   1.01873900e-04
   7.05557292e-05   3.05297940e-03   8.66295221e-02   1.33354721e-02
   1.13327551e-04   4.89673187e-05   2.76497031e-04   7.06924577e-06
   0.00000000e+00   1.05123901e-05   2.38617541e-02   6.44182315e-04
   1.56211092e-04   4.16962600e-04   1.10565233e-03   3.18128454e-03
   1.09144814e-04   5.49184375e-04   2.79544474e-03   2.92380948e-03
   1.89530078e-03   3.49917125e-04   1.20665891e-05   9.90510567e-04
   1.59937333e-03   0.00000000e+00   5.50751343e-06   3.20254976e-02
   4.42078304e-05   1.22853142e-02   1.81148951e-04   7.79964842e-03
   3.34286484e-04   2.96086147e-04   2.07153088e-03   9.47022508e-04
   3.32704948e-05   1.31446119e-03   1.33203270e-04   2.28632379e-04
   7.75617457e-05   6.05736826e-02   3.63015790e-02   5.33921079e-04
   1.05935941e-04   1.08677903e-05   1.67766781e-03   1.42805455e-02
   6.15180269e-03   1.31639018e-04   6.57226628e-05   1.48221593e-04
   0.00000000e+00   1.82646283e-05   6.06074697e-06   5.92231320e-04
   5.22779113e-04   7.63872108e-04   2.80244491e-04   9.66878980e-04
   2.73293859e-04   6.28633185e-04   1.33143909e-04   3.44422615e-03
   1.44653519e-03   5.97602081e-04   4.51979104e-04   6.18386914e-03
   1.57390987e-03   1.32559895e-04   1.08053942e-05   0.00000000e+00
   1.51224828e-02   7.71709161e-06   1.22816551e-02   1.45439268e-04
   5.04304903e-03   3.38664041e-04   3.43208888e-04   1.31516455e-03
   9.31178721e-04   5.14782029e-05   7.45369027e-04   4.56742341e-03
   1.26450613e-04   1.00504804e-04   9.99462315e-03   3.84530985e-02
   3.81421014e-04   1.30235417e-04   3.29201075e-05   1.24650916e-03
   1.42451180e-02   1.00043204e-02   2.34100961e-04   4.95725061e-05
   2.12552349e-04   0.00000000e+00   3.01857822e-05   0.00000000e+00
   8.32300718e-04   3.77531586e-04   6.08992402e-05   5.08128591e-04
   5.88430350e-03   9.39081636e-04   3.25751965e-04   2.98181786e-04
   1.37808804e-02   9.91268180e-04   1.35351212e-03   3.23399465e-04
   6.44295893e-03   1.65739700e-03   8.20441976e-03   0.00000000e+00
   0.00000000e+00   3.06309336e-02   8.25924023e-06   3.18134895e-03
   8.67946778e-06   6.85203330e-04   3.73056050e-04   1.73695990e-04
   4.32569685e-05   5.84665307e-04   5.29124787e-05   6.13953338e-04
   1.01684199e-03   8.99434507e-05   1.28932273e-04   5.48788521e-02
   1.45795158e-02   7.39060226e-05   5.11150381e-05   2.69028286e-05
   4.16871749e-03   6.59239824e-03   4.01351956e-03   1.67177349e-04
   1.98510367e-05   1.70674795e-04   8.37525322e-06   2.95497710e-05
   0.00000000e+00   8.58729569e-05   4.68033344e-04   6.59809315e-05
   4.24940726e-04   2.25743820e-03   2.24763234e-04   2.16985636e-04
   1.79903849e-04   1.17617119e-03   2.11804317e-03   3.16675304e-04
   8.56579745e-04   5.09347852e-04   1.53219712e-03   2.32602313e-04
   0.00000000e+00   1.34513978e-05   1.38236627e-02   9.32559276e-06
   8.65281211e-03   1.38911269e-05   6.46689807e-04   1.84414324e-04
   1.54305300e-04   1.59900099e-05   3.74911111e-04   2.57333216e-05
   1.03722303e-03   1.28685434e-03   6.21588944e-05   8.76462499e-05
   3.59018353e-02   8.39748979e-03   0.00000000e+00   1.57041672e-04
   6.41317887e-05   2.11620612e-03   1.26511837e-02   7.64334816e-03
   1.82044811e-04   6.14368269e-06   1.29966498e-04   0.00000000e+00
   3.49913145e-05   0.00000000e+00   1.94945951e-04   3.36763838e-04
   2.79659833e-04   3.09219807e-04   7.55410961e-04   1.89659867e-04
   4.85857252e-03   7.76329668e-04]
--------------------------------------------------
# Mapped to Column Name
     0.00595 	 age
     0.00139 	 antiguedad
     0.0023 	 canal_entrada
     0.00051 	 cod_prov
     0.0 	 conyuemp
     0.0024 	 fecha_alta
     0.00087 	 ind_actividad_cliente
     4e-05 	 ind_empleado
     0.00684 	 ind_nuevo
     0.0003 	 indext
     0.0 	 indfall
     4e-05 	 indrel
     0.0 	 indrel_1mes
     0.0 	 indresi
     0.00038 	 nomprov
     0.0 	 pais_residencia
     0.00059 	 renta
     0.00436 	 segmento
     0.00045 	 sexo
     0.00191 	 tiprel_1mes
     5e-05 	 ult_fec_cli_1t
     0.01112 	 age_lag_one
     0.00088 	 antiguedad_lag_one
     0.00193 	 canal_entrada_lag_one
     0.00044 	 cod_prov_lag_one
     0.00019 	 conyuemp_lag_one
     0.00076 	 fecha_alta_lag_one
     0.02275 	 ind_actividad_cliente_lag_one
     1e-05 	 ind_ahor_fin_ult1_lag_one
     0.0 	 ind_aval_fin_ult1_lag_one
     0.08298 	 ind_cco_fin_ult1_lag_one
     1e-05 	 ind_cder_fin_ult1_lag_one
     0.01267 	 ind_cno_fin_ult1_lag_one
     0.00105 	 ind_ctju_fin_ult1_lag_one
     0.00036 	 ind_ctma_fin_ult1_lag_one
     0.00024 	 ind_ctop_fin_ult1_lag_one
     0.00018 	 ind_ctpp_fin_ult1_lag_one
     0.00093 	 ind_deco_fin_ult1_lag_one
     0.00072 	 ind_dela_fin_ult1_lag_one
     3e-05 	 ind_deme_fin_ult1_lag_one
     0.0017 	 ind_ecue_fin_ult1_lag_one
     8e-05 	 ind_empleado_lag_one
     0.00017 	 ind_fond_fin_ult1_lag_one
     9e-05 	 ind_hip_fin_ult1_lag_one
     0.03977 	 ind_nom_pens_ult1_lag_one
     0.05579 	 ind_nomina_ult1_lag_one
     0.00421 	 ind_nuevo_lag_one
     0.0001 	 ind_plan_fin_ult1_lag_one
     7e-05 	 ind_pres_fin_ult1_lag_one
     0.00305 	 ind_reca_fin_ult1_lag_one
     0.08663 	 ind_recibo_ult1_lag_one
     0.01334 	 ind_tjcr_fin_ult1_lag_one
     0.00011 	 ind_valo_fin_ult1_lag_one
     5e-05 	 ind_viv_fin_ult1_lag_one
     0.00028 	 indext_lag_one
     1e-05 	 indfall_lag_one
     0.0 	 indrel_lag_one
     1e-05 	 indrel_1mes_lag_one
     0.02386 	 indresi_lag_one
     0.00064 	 nomprov_lag_one
     0.00016 	 pais_residencia_lag_one
     0.00042 	 renta_lag_one
     0.00111 	 segmento_lag_one
     0.00318 	 sexo_lag_one
     0.00011 	 tiprel_1mes_lag_one
     0.00055 	 ult_fec_cli_1t_lag_one
     0.0028 	 age_lag_two
     0.00292 	 antiguedad_lag_two
     0.0019 	 canal_entrada_lag_two
     0.00035 	 cod_prov_lag_two
     1e-05 	 conyuemp_lag_two
     0.00099 	 fecha_alta_lag_two
     0.0016 	 ind_actividad_cliente_lag_two
     0.0 	 ind_ahor_fin_ult1_lag_two
     1e-05 	 ind_aval_fin_ult1_lag_two
     0.03203 	 ind_cco_fin_ult1_lag_two
     4e-05 	 ind_cder_fin_ult1_lag_two
     0.01229 	 ind_cno_fin_ult1_lag_two
     0.00018 	 ind_ctju_fin_ult1_lag_two
     0.0078 	 ind_ctma_fin_ult1_lag_two
     0.00033 	 ind_ctop_fin_ult1_lag_two
     0.0003 	 ind_ctpp_fin_ult1_lag_two
     0.00207 	 ind_deco_fin_ult1_lag_two
     0.00095 	 ind_dela_fin_ult1_lag_two
     3e-05 	 ind_deme_fin_ult1_lag_two
     0.00131 	 ind_ecue_fin_ult1_lag_two
     0.00013 	 ind_empleado_lag_two
     0.00023 	 ind_fond_fin_ult1_lag_two
     8e-05 	 ind_hip_fin_ult1_lag_two
     0.06057 	 ind_nom_pens_ult1_lag_two
     0.0363 	 ind_nomina_ult1_lag_two
     0.00053 	 ind_nuevo_lag_two
     0.00011 	 ind_plan_fin_ult1_lag_two
     1e-05 	 ind_pres_fin_ult1_lag_two
     0.00168 	 ind_reca_fin_ult1_lag_two
     0.01428 	 ind_recibo_ult1_lag_two
     0.00615 	 ind_tjcr_fin_ult1_lag_two
     0.00013 	 ind_valo_fin_ult1_lag_two
     7e-05 	 ind_viv_fin_ult1_lag_two
     0.00015 	 indext_lag_two
     0.0 	 indfall_lag_two
     2e-05 	 indrel_lag_two
     1e-05 	 indrel_1mes_lag_two
     0.00059 	 indresi_lag_two
     0.00052 	 nomprov_lag_two
     0.00076 	 pais_residencia_lag_two
     0.00028 	 renta_lag_two
     0.00097 	 segmento_lag_two
     0.00027 	 sexo_lag_two
     0.00063 	 tiprel_1mes_lag_two
     0.00013 	 ult_fec_cli_1t_lag_two
     0.00344 	 age_lag_thr
     0.00145 	 antiguedad_lag_thr
     0.0006 	 canal_entrada_lag_thr
     0.00045 	 cod_prov_lag_thr
     0.00618 	 conyuemp_lag_thr
     0.00157 	 fecha_alta_lag_thr
     0.00013 	 ind_actividad_cliente_lag_thr
     1e-05 	 ind_ahor_fin_ult1_lag_thr
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.01512 	 ind_cco_fin_ult1_lag_thr
     1e-05 	 ind_cder_fin_ult1_lag_thr
     0.01228 	 ind_cno_fin_ult1_lag_thr
     0.00015 	 ind_ctju_fin_ult1_lag_thr
     0.00504 	 ind_ctma_fin_ult1_lag_thr
     0.00034 	 ind_ctop_fin_ult1_lag_thr
     0.00034 	 ind_ctpp_fin_ult1_lag_thr
     0.00132 	 ind_deco_fin_ult1_lag_thr
     0.00093 	 ind_dela_fin_ult1_lag_thr
     5e-05 	 ind_deme_fin_ult1_lag_thr
     0.00075 	 ind_ecue_fin_ult1_lag_thr
     0.00457 	 ind_empleado_lag_thr
     0.00013 	 ind_fond_fin_ult1_lag_thr
     0.0001 	 ind_hip_fin_ult1_lag_thr
     0.00999 	 ind_nom_pens_ult1_lag_thr
     0.03845 	 ind_nomina_ult1_lag_thr
     0.00038 	 ind_nuevo_lag_thr
     0.00013 	 ind_plan_fin_ult1_lag_thr
     3e-05 	 ind_pres_fin_ult1_lag_thr
     0.00125 	 ind_reca_fin_ult1_lag_thr
     0.01425 	 ind_recibo_ult1_lag_thr
     0.01 	 ind_tjcr_fin_ult1_lag_thr
     0.00023 	 ind_valo_fin_ult1_lag_thr
     5e-05 	 ind_viv_fin_ult1_lag_thr
     0.00021 	 indext_lag_thr
     0.0 	 indfall_lag_thr
     3e-05 	 indrel_lag_thr
     0.0 	 indrel_1mes_lag_thr
     0.00083 	 indresi_lag_thr
     0.00038 	 nomprov_lag_thr
     6e-05 	 pais_residencia_lag_thr
     0.00051 	 renta_lag_thr
     0.00588 	 segmento_lag_thr
     0.00094 	 sexo_lag_thr
     0.00033 	 tiprel_1mes_lag_thr
     0.0003 	 ult_fec_cli_1t_lag_thr
     0.01378 	 age_lag_fou
     0.00099 	 antiguedad_lag_fou
     0.00135 	 canal_entrada_lag_fou
     0.00032 	 cod_prov_lag_fou
     0.00644 	 conyuemp_lag_fou
     0.00166 	 fecha_alta_lag_fou
     0.0082 	 ind_actividad_cliente_lag_fou
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.03063 	 ind_cco_fin_ult1_lag_fou
     1e-05 	 ind_cder_fin_ult1_lag_fou
     0.00318 	 ind_cno_fin_ult1_lag_fou
     1e-05 	 ind_ctju_fin_ult1_lag_fou
     0.00069 	 ind_ctma_fin_ult1_lag_fou
     0.00037 	 ind_ctop_fin_ult1_lag_fou
     0.00017 	 ind_ctpp_fin_ult1_lag_fou
     4e-05 	 ind_deco_fin_ult1_lag_fou
     0.00058 	 ind_dela_fin_ult1_lag_fou
     5e-05 	 ind_deme_fin_ult1_lag_fou
     0.00061 	 ind_ecue_fin_ult1_lag_fou
     0.00102 	 ind_empleado_lag_fou
     9e-05 	 ind_fond_fin_ult1_lag_fou
     0.00013 	 ind_hip_fin_ult1_lag_fou
     0.05488 	 ind_nom_pens_ult1_lag_fou
     0.01458 	 ind_nomina_ult1_lag_fou
     7e-05 	 ind_nuevo_lag_fou
     5e-05 	 ind_plan_fin_ult1_lag_fou
     3e-05 	 ind_pres_fin_ult1_lag_fou
     0.00417 	 ind_reca_fin_ult1_lag_fou
     0.00659 	 ind_recibo_ult1_lag_fou
     0.00401 	 ind_tjcr_fin_ult1_lag_fou
     0.00017 	 ind_valo_fin_ult1_lag_fou
     2e-05 	 ind_viv_fin_ult1_lag_fou
     0.00017 	 indext_lag_fou
     1e-05 	 indfall_lag_fou
     3e-05 	 indrel_lag_fou
     0.0 	 indrel_1mes_lag_fou
     9e-05 	 indresi_lag_fou
     0.00047 	 nomprov_lag_fou
     7e-05 	 pais_residencia_lag_fou
     0.00042 	 renta_lag_fou
     0.00226 	 segmento_lag_fou
     0.00022 	 sexo_lag_fou
     0.00022 	 tiprel_1mes_lag_fou
     0.00018 	 ult_fec_cli_1t_lag_fou
     0.00118 	 age_lag_fiv
     0.00212 	 antiguedad_lag_fiv
     0.00032 	 canal_entrada_lag_fiv
     0.00086 	 cod_prov_lag_fiv
     0.00051 	 conyuemp_lag_fiv
     0.00153 	 fecha_alta_lag_fiv
     0.00023 	 ind_actividad_cliente_lag_fiv
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     1e-05 	 ind_aval_fin_ult1_lag_fiv
     0.01382 	 ind_cco_fin_ult1_lag_fiv
     1e-05 	 ind_cder_fin_ult1_lag_fiv
     0.00865 	 ind_cno_fin_ult1_lag_fiv
     1e-05 	 ind_ctju_fin_ult1_lag_fiv
     0.00065 	 ind_ctma_fin_ult1_lag_fiv
     0.00018 	 ind_ctop_fin_ult1_lag_fiv
     0.00015 	 ind_ctpp_fin_ult1_lag_fiv
     2e-05 	 ind_deco_fin_ult1_lag_fiv
     0.00037 	 ind_dela_fin_ult1_lag_fiv
     3e-05 	 ind_deme_fin_ult1_lag_fiv
     0.00104 	 ind_ecue_fin_ult1_lag_fiv
     0.00129 	 ind_empleado_lag_fiv
     6e-05 	 ind_fond_fin_ult1_lag_fiv
     9e-05 	 ind_hip_fin_ult1_lag_fiv
     0.0359 	 ind_nom_pens_ult1_lag_fiv
     0.0084 	 ind_nomina_ult1_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.00016 	 ind_plan_fin_ult1_lag_fiv
     6e-05 	 ind_pres_fin_ult1_lag_fiv
     0.00212 	 ind_reca_fin_ult1_lag_fiv
     0.01265 	 ind_recibo_ult1_lag_fiv
     0.00764 	 ind_tjcr_fin_ult1_lag_fiv
     0.00018 	 ind_valo_fin_ult1_lag_fiv
     1e-05 	 ind_viv_fin_ult1_lag_fiv
     0.00013 	 indext_lag_fiv
     0.0 	 indfall_lag_fiv
     3e-05 	 indrel_lag_fiv
     0.0 	 indrel_1mes_lag_fiv
     0.00019 	 indresi_lag_fiv
     0.00034 	 nomprov_lag_fiv
     0.00028 	 pais_residencia_lag_fiv
     0.00031 	 renta_lag_fiv
     0.00076 	 segmento_lag_fiv
     0.00019 	 sexo_lag_fiv
     0.00486 	 tiprel_1mes_lag_fiv
     0.00078 	 ult_fec_cli_1t_lag_fiv
--------------------------------------------------
# Sorted Feature Importance
     0.08663 	 ind_recibo_ult1_lag_one
     0.08298 	 ind_cco_fin_ult1_lag_one
     0.06057 	 ind_nom_pens_ult1_lag_two
     0.05579 	 ind_nomina_ult1_lag_one
     0.05488 	 ind_nom_pens_ult1_lag_fou
     0.03977 	 ind_nom_pens_ult1_lag_one
     0.03845 	 ind_nomina_ult1_lag_thr
     0.0363 	 ind_nomina_ult1_lag_two
     0.0359 	 ind_nom_pens_ult1_lag_fiv
     0.03203 	 ind_cco_fin_ult1_lag_two
     0.03063 	 ind_cco_fin_ult1_lag_fou
     0.02386 	 indresi_lag_one
     0.02275 	 ind_actividad_cliente_lag_one
     0.01512 	 ind_cco_fin_ult1_lag_thr
     0.01458 	 ind_nomina_ult1_lag_fou
     0.01428 	 ind_recibo_ult1_lag_two
     0.01425 	 ind_recibo_ult1_lag_thr
     0.01382 	 ind_cco_fin_ult1_lag_fiv
     0.01378 	 age_lag_fou
     0.01334 	 ind_tjcr_fin_ult1_lag_one
     0.01267 	 ind_cno_fin_ult1_lag_one
     0.01265 	 ind_recibo_ult1_lag_fiv
     0.01229 	 ind_cno_fin_ult1_lag_two
     0.01228 	 ind_cno_fin_ult1_lag_thr
     0.01112 	 age_lag_one
     0.01 	 ind_tjcr_fin_ult1_lag_thr
     0.00999 	 ind_nom_pens_ult1_lag_thr
     0.00865 	 ind_cno_fin_ult1_lag_fiv
     0.0084 	 ind_nomina_ult1_lag_fiv
     0.0082 	 ind_actividad_cliente_lag_fou
     0.0078 	 ind_ctma_fin_ult1_lag_two
     0.00764 	 ind_tjcr_fin_ult1_lag_fiv
     0.00684 	 ind_nuevo
     0.00659 	 ind_recibo_ult1_lag_fou
     0.00644 	 conyuemp_lag_fou
     0.00618 	 conyuemp_lag_thr
     0.00615 	 ind_tjcr_fin_ult1_lag_two
     0.00595 	 age
     0.00588 	 segmento_lag_thr
     0.00504 	 ind_ctma_fin_ult1_lag_thr
     0.00486 	 tiprel_1mes_lag_fiv
     0.00457 	 ind_empleado_lag_thr
     0.00436 	 segmento
     0.00421 	 ind_nuevo_lag_one
     0.00417 	 ind_reca_fin_ult1_lag_fou
     0.00401 	 ind_tjcr_fin_ult1_lag_fou
     0.00344 	 age_lag_thr
     0.00318 	 ind_cno_fin_ult1_lag_fou
     0.00318 	 sexo_lag_one
     0.00305 	 ind_reca_fin_ult1_lag_one
     0.00292 	 antiguedad_lag_two
     0.0028 	 age_lag_two
     0.0024 	 fecha_alta
     0.0023 	 canal_entrada
     0.00226 	 segmento_lag_fou
     0.00212 	 antiguedad_lag_fiv
     0.00212 	 ind_reca_fin_ult1_lag_fiv
     0.00207 	 ind_deco_fin_ult1_lag_two
     0.00193 	 canal_entrada_lag_one
     0.00191 	 tiprel_1mes
     0.0019 	 canal_entrada_lag_two
     0.0017 	 ind_ecue_fin_ult1_lag_one
     0.00168 	 ind_reca_fin_ult1_lag_two
     0.00166 	 fecha_alta_lag_fou
     0.0016 	 ind_actividad_cliente_lag_two
     0.00157 	 fecha_alta_lag_thr
     0.00153 	 fecha_alta_lag_fiv
     0.00145 	 antiguedad_lag_thr
     0.00139 	 antiguedad
     0.00135 	 canal_entrada_lag_fou
     0.00132 	 ind_deco_fin_ult1_lag_thr
     0.00131 	 ind_ecue_fin_ult1_lag_two
     0.00129 	 ind_empleado_lag_fiv
     0.00125 	 ind_reca_fin_ult1_lag_thr
     0.00118 	 age_lag_fiv
     0.00111 	 segmento_lag_one
     0.00105 	 ind_ctju_fin_ult1_lag_one
     0.00104 	 ind_ecue_fin_ult1_lag_fiv
     0.00102 	 ind_empleado_lag_fou
     0.00099 	 antiguedad_lag_fou
     0.00099 	 fecha_alta_lag_two
     0.00097 	 segmento_lag_two
     0.00095 	 ind_dela_fin_ult1_lag_two
     0.00094 	 sexo_lag_thr
     0.00093 	 ind_dela_fin_ult1_lag_thr
     0.00093 	 ind_deco_fin_ult1_lag_one
     0.00088 	 antiguedad_lag_one
     0.00087 	 ind_actividad_cliente
     0.00086 	 cod_prov_lag_fiv
     0.00083 	 indresi_lag_thr
     0.00078 	 ult_fec_cli_1t_lag_fiv
     0.00076 	 pais_residencia_lag_two
     0.00076 	 fecha_alta_lag_one
     0.00076 	 segmento_lag_fiv
     0.00075 	 ind_ecue_fin_ult1_lag_thr
     0.00072 	 ind_dela_fin_ult1_lag_one
     0.00069 	 ind_ctma_fin_ult1_lag_fou
     0.00065 	 ind_ctma_fin_ult1_lag_fiv
     0.00064 	 nomprov_lag_one
     0.00063 	 tiprel_1mes_lag_two
     0.00061 	 ind_ecue_fin_ult1_lag_fou
     0.0006 	 canal_entrada_lag_thr
     0.00059 	 indresi_lag_two
     0.00059 	 renta
     0.00058 	 ind_dela_fin_ult1_lag_fou
     0.00055 	 ult_fec_cli_1t_lag_one
     0.00053 	 ind_nuevo_lag_two
     0.00052 	 nomprov_lag_two
     0.00051 	 conyuemp_lag_fiv
     0.00051 	 renta_lag_thr
     0.00051 	 cod_prov
     0.00047 	 nomprov_lag_fou
     0.00045 	 sexo
     0.00045 	 cod_prov_lag_thr
     0.00044 	 cod_prov_lag_one
     0.00042 	 renta_lag_fou
     0.00042 	 renta_lag_one
     0.00038 	 nomprov
     0.00038 	 ind_nuevo_lag_thr
     0.00038 	 nomprov_lag_thr
     0.00037 	 ind_dela_fin_ult1_lag_fiv
     0.00037 	 ind_ctop_fin_ult1_lag_fou
     0.00036 	 ind_ctma_fin_ult1_lag_one
     0.00035 	 cod_prov_lag_two
     0.00034 	 ind_ctpp_fin_ult1_lag_thr
     0.00034 	 ind_ctop_fin_ult1_lag_thr
     0.00034 	 nomprov_lag_fiv
     0.00033 	 ind_ctop_fin_ult1_lag_two
     0.00033 	 tiprel_1mes_lag_thr
     0.00032 	 cod_prov_lag_fou
     0.00032 	 canal_entrada_lag_fiv
     0.00031 	 renta_lag_fiv
     0.0003 	 indext
     0.0003 	 ult_fec_cli_1t_lag_thr
     0.0003 	 ind_ctpp_fin_ult1_lag_two
     0.00028 	 renta_lag_two
     0.00028 	 pais_residencia_lag_fiv
     0.00028 	 indext_lag_one
     0.00027 	 sexo_lag_two
     0.00024 	 ind_ctop_fin_ult1_lag_one
     0.00023 	 ind_valo_fin_ult1_lag_thr
     0.00023 	 ind_actividad_cliente_lag_fiv
     0.00023 	 ind_fond_fin_ult1_lag_two
     0.00022 	 sexo_lag_fou
     0.00022 	 tiprel_1mes_lag_fou
     0.00021 	 indext_lag_thr
     0.00019 	 indresi_lag_fiv
     0.00019 	 sexo_lag_fiv
     0.00019 	 conyuemp_lag_one
     0.00018 	 ind_ctop_fin_ult1_lag_fiv
     0.00018 	 ind_valo_fin_ult1_lag_fiv
     0.00018 	 ind_ctju_fin_ult1_lag_two
     0.00018 	 ult_fec_cli_1t_lag_fou
     0.00018 	 ind_ctpp_fin_ult1_lag_one
     0.00017 	 ind_ctpp_fin_ult1_lag_fou
     0.00017 	 indext_lag_fou
     0.00017 	 ind_valo_fin_ult1_lag_fou
     0.00017 	 ind_fond_fin_ult1_lag_one
     0.00016 	 ind_plan_fin_ult1_lag_fiv
     0.00016 	 pais_residencia_lag_one
     0.00015 	 ind_ctpp_fin_ult1_lag_fiv
     0.00015 	 indext_lag_two
     0.00015 	 ind_ctju_fin_ult1_lag_thr
     0.00013 	 ind_empleado_lag_two
     0.00013 	 ult_fec_cli_1t_lag_two
     0.00013 	 ind_actividad_cliente_lag_thr
     0.00013 	 ind_valo_fin_ult1_lag_two
     0.00013 	 ind_plan_fin_ult1_lag_thr
     0.00013 	 indext_lag_fiv
     0.00013 	 ind_hip_fin_ult1_lag_fou
     0.00013 	 ind_fond_fin_ult1_lag_thr
     0.00011 	 ind_valo_fin_ult1_lag_one
     0.00011 	 tiprel_1mes_lag_one
     0.00011 	 ind_plan_fin_ult1_lag_two
     0.0001 	 ind_plan_fin_ult1_lag_one
     0.0001 	 ind_hip_fin_ult1_lag_thr
     9e-05 	 ind_hip_fin_ult1_lag_one
     9e-05 	 ind_fond_fin_ult1_lag_fou
     9e-05 	 ind_hip_fin_ult1_lag_fiv
     9e-05 	 indresi_lag_fou
     8e-05 	 ind_empleado_lag_one
     8e-05 	 ind_hip_fin_ult1_lag_two
     7e-05 	 ind_nuevo_lag_fou
     7e-05 	 ind_pres_fin_ult1_lag_one
     7e-05 	 pais_residencia_lag_fou
     7e-05 	 ind_viv_fin_ult1_lag_two
     6e-05 	 ind_pres_fin_ult1_lag_fiv
     6e-05 	 ind_fond_fin_ult1_lag_fiv
     6e-05 	 pais_residencia_lag_thr
     5e-05 	 ind_deme_fin_ult1_lag_fou
     5e-05 	 ind_deme_fin_ult1_lag_thr
     5e-05 	 ind_plan_fin_ult1_lag_fou
     5e-05 	 ind_viv_fin_ult1_lag_thr
     5e-05 	 ind_viv_fin_ult1_lag_one
     5e-05 	 ult_fec_cli_1t
     4e-05 	 indrel
     4e-05 	 ind_cder_fin_ult1_lag_two
     4e-05 	 ind_deco_fin_ult1_lag_fou
     4e-05 	 ind_empleado
     3e-05 	 indrel_lag_fiv
     3e-05 	 ind_deme_fin_ult1_lag_two
     3e-05 	 ind_pres_fin_ult1_lag_thr
     3e-05 	 ind_deme_fin_ult1_lag_one
     3e-05 	 indrel_lag_thr
     3e-05 	 indrel_lag_fou
     3e-05 	 ind_pres_fin_ult1_lag_fou
     3e-05 	 ind_deme_fin_ult1_lag_fiv
     2e-05 	 ind_viv_fin_ult1_lag_fou
     2e-05 	 indrel_lag_two
     2e-05 	 ind_deco_fin_ult1_lag_fiv
     1e-05 	 ind_ctju_fin_ult1_lag_fiv
     1e-05 	 ind_aval_fin_ult1_lag_fiv
     1e-05 	 conyuemp_lag_two
     1e-05 	 ind_pres_fin_ult1_lag_two
     1e-05 	 ind_ahor_fin_ult1_lag_thr
     1e-05 	 indrel_1mes_lag_one
     1e-05 	 ind_cder_fin_ult1_lag_one
     1e-05 	 ind_ahor_fin_ult1_lag_one
     1e-05 	 ind_cder_fin_ult1_lag_fiv
     1e-05 	 ind_ctju_fin_ult1_lag_fou
     1e-05 	 indfall_lag_fou
     1e-05 	 ind_cder_fin_ult1_lag_fou
     1e-05 	 ind_cder_fin_ult1_lag_thr
     1e-05 	 indfall_lag_one
     1e-05 	 ind_viv_fin_ult1_lag_fiv
     1e-05 	 indrel_1mes_lag_two
     1e-05 	 ind_aval_fin_ult1_lag_two
     0.0 	 conyuemp
     0.0 	 indrel_1mes
     0.0 	 indrel_lag_one
     0.0 	 indrel_1mes_lag_thr
     0.0 	 indfall_lag_two
     0.0 	 ind_aval_fin_ult1_lag_one
     0.0 	 indfall_lag_fiv
     0.0 	 indresi
     0.0 	 ind_ahor_fin_ult1_lag_two
     0.0 	 ind_ahor_fin_ult1_lag_fou
     0.0 	 indrel_1mes_lag_fou
     0.0 	 ind_aval_fin_ult1_lag_fou
     0.0 	 indfall
     0.0 	 indfall_lag_thr
     0.0 	 ind_ahor_fin_ult1_lag_fiv
     0.0 	 indrel_1mes_lag_fiv
     0.0 	 ind_nuevo_lag_fiv
     0.0 	 ind_aval_fin_ult1_lag_thr
     0.0 	 pais_residencia

In [26]:
# 주요 변수 시각화
plot_fimp(et_fimp)


피쳐 엔지니어링 (데이터 최적화) [+2]

- 직접 새로운 변수를 추가 혹은 기존 변수를 삭제하여서 최적의 변수세트 생성해보기
- 주의: 훈련 데이터에 수행한 변수 변환은 테스트 데이터에도 동일하게 수행해야함
- 힌트: 금융 상품에 대한 새로운 정보를 넣는 것

In [ ]:
# 입력 : trn, target, tst
# 출력 : new trn, new tst, same target

In [17]:
trn.head()


Out[17]:
age antiguedad canal_entrada cod_prov conyuemp fecha_alta ind_actividad_cliente ind_empleado ind_nuevo indext ... indrel_lag_fiv indrel_1mes_lag_fiv indresi_lag_fiv nomprov_lag_fiv pais_residencia_lag_fiv renta_lag_fiv segmento_lag_fiv sexo_lag_fiv tiprel_1mes_lag_fiv ult_fec_cli_1t_lag_fiv
0 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
3 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
4 40 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -153.0

5 rows × 246 columns


In [18]:
trn["age"] = (trn["age"]/10).astype(int)

In [21]:
print(trn.columns)


Index(['age', 'antiguedad', 'canal_entrada', 'cod_prov', 'conyuemp',
       'fecha_alta', 'ind_actividad_cliente', 'ind_empleado', 'ind_nuevo',
       'indext',
       ...
       'indrel_lag_fiv', 'indrel_1mes_lag_fiv', 'indresi_lag_fiv',
       'nomprov_lag_fiv', 'pais_residencia_lag_fiv', 'renta_lag_fiv',
       'segmento_lag_fiv', 'sexo_lag_fiv', 'tiprel_1mes_lag_fiv',
       'ult_fec_cli_1t_lag_fiv'],
      dtype='object', length=246)

In [22]:
pd.set_option('display.max_columns', 500)

In [29]:
trn.head()


Out[29]:
age antiguedad canal_entrada cod_prov conyuemp fecha_alta ind_actividad_cliente ind_empleado ind_nuevo indext indfall indrel indrel_1mes indresi nomprov pais_residencia renta segmento sexo tiprel_1mes ult_fec_cli_1t age_lag_one antiguedad_lag_one canal_entrada_lag_one cod_prov_lag_one conyuemp_lag_one fecha_alta_lag_one ind_actividad_cliente_lag_one ind_ahor_fin_ult1_lag_one ind_aval_fin_ult1_lag_one ind_cco_fin_ult1_lag_one ind_cder_fin_ult1_lag_one ind_cno_fin_ult1_lag_one ind_ctju_fin_ult1_lag_one ind_ctma_fin_ult1_lag_one ind_ctop_fin_ult1_lag_one ind_ctpp_fin_ult1_lag_one ind_deco_fin_ult1_lag_one ind_dela_fin_ult1_lag_one ind_deme_fin_ult1_lag_one ind_ecue_fin_ult1_lag_one ind_empleado_lag_one ind_fond_fin_ult1_lag_one ind_hip_fin_ult1_lag_one ind_nom_pens_ult1_lag_one ind_nomina_ult1_lag_one ind_nuevo_lag_one ind_plan_fin_ult1_lag_one ind_pres_fin_ult1_lag_one ind_reca_fin_ult1_lag_one ind_recibo_ult1_lag_one ind_tjcr_fin_ult1_lag_one ind_valo_fin_ult1_lag_one ind_viv_fin_ult1_lag_one indext_lag_one indfall_lag_one indrel_lag_one indrel_1mes_lag_one indresi_lag_one nomprov_lag_one pais_residencia_lag_one renta_lag_one segmento_lag_one sexo_lag_one tiprel_1mes_lag_one ult_fec_cli_1t_lag_one age_lag_two antiguedad_lag_two canal_entrada_lag_two cod_prov_lag_two conyuemp_lag_two fecha_alta_lag_two ind_actividad_cliente_lag_two ind_ahor_fin_ult1_lag_two ind_aval_fin_ult1_lag_two ind_cco_fin_ult1_lag_two ind_cder_fin_ult1_lag_two ind_cno_fin_ult1_lag_two ind_ctju_fin_ult1_lag_two ind_ctma_fin_ult1_lag_two ind_ctop_fin_ult1_lag_two ind_ctpp_fin_ult1_lag_two ind_deco_fin_ult1_lag_two ind_dela_fin_ult1_lag_two ind_deme_fin_ult1_lag_two ind_ecue_fin_ult1_lag_two ind_empleado_lag_two ind_fond_fin_ult1_lag_two ind_hip_fin_ult1_lag_two ind_nom_pens_ult1_lag_two ind_nomina_ult1_lag_two ind_nuevo_lag_two ind_plan_fin_ult1_lag_two ind_pres_fin_ult1_lag_two ind_reca_fin_ult1_lag_two ind_recibo_ult1_lag_two ind_tjcr_fin_ult1_lag_two ind_valo_fin_ult1_lag_two ind_viv_fin_ult1_lag_two indext_lag_two indfall_lag_two indrel_lag_two indrel_1mes_lag_two indresi_lag_two nomprov_lag_two pais_residencia_lag_two renta_lag_two segmento_lag_two sexo_lag_two tiprel_1mes_lag_two ult_fec_cli_1t_lag_two age_lag_thr antiguedad_lag_thr canal_entrada_lag_thr cod_prov_lag_thr conyuemp_lag_thr fecha_alta_lag_thr ind_actividad_cliente_lag_thr ind_ahor_fin_ult1_lag_thr ind_aval_fin_ult1_lag_thr ind_cco_fin_ult1_lag_thr ind_cder_fin_ult1_lag_thr ind_cno_fin_ult1_lag_thr ind_ctju_fin_ult1_lag_thr ind_ctma_fin_ult1_lag_thr ind_ctop_fin_ult1_lag_thr ind_ctpp_fin_ult1_lag_thr ind_deco_fin_ult1_lag_thr ind_dela_fin_ult1_lag_thr ind_deme_fin_ult1_lag_thr ind_ecue_fin_ult1_lag_thr ind_empleado_lag_thr ind_fond_fin_ult1_lag_thr ind_hip_fin_ult1_lag_thr ind_nom_pens_ult1_lag_thr ind_nomina_ult1_lag_thr ind_nuevo_lag_thr ind_plan_fin_ult1_lag_thr ind_pres_fin_ult1_lag_thr ind_reca_fin_ult1_lag_thr ind_recibo_ult1_lag_thr ind_tjcr_fin_ult1_lag_thr ind_valo_fin_ult1_lag_thr ind_viv_fin_ult1_lag_thr indext_lag_thr indfall_lag_thr indrel_lag_thr indrel_1mes_lag_thr indresi_lag_thr nomprov_lag_thr pais_residencia_lag_thr renta_lag_thr segmento_lag_thr sexo_lag_thr tiprel_1mes_lag_thr ult_fec_cli_1t_lag_thr age_lag_fou antiguedad_lag_fou canal_entrada_lag_fou cod_prov_lag_fou conyuemp_lag_fou fecha_alta_lag_fou ind_actividad_cliente_lag_fou ind_ahor_fin_ult1_lag_fou ind_aval_fin_ult1_lag_fou ind_cco_fin_ult1_lag_fou ind_cder_fin_ult1_lag_fou ind_cno_fin_ult1_lag_fou ind_ctju_fin_ult1_lag_fou ind_ctma_fin_ult1_lag_fou ind_ctop_fin_ult1_lag_fou ind_ctpp_fin_ult1_lag_fou ind_deco_fin_ult1_lag_fou ind_dela_fin_ult1_lag_fou ind_deme_fin_ult1_lag_fou ind_ecue_fin_ult1_lag_fou ind_empleado_lag_fou ind_fond_fin_ult1_lag_fou ind_hip_fin_ult1_lag_fou ind_nom_pens_ult1_lag_fou ind_nomina_ult1_lag_fou ind_nuevo_lag_fou ind_plan_fin_ult1_lag_fou ind_pres_fin_ult1_lag_fou ind_reca_fin_ult1_lag_fou ind_recibo_ult1_lag_fou ind_tjcr_fin_ult1_lag_fou ind_valo_fin_ult1_lag_fou ind_viv_fin_ult1_lag_fou indext_lag_fou indfall_lag_fou indrel_lag_fou indrel_1mes_lag_fou indresi_lag_fou nomprov_lag_fou pais_residencia_lag_fou renta_lag_fou segmento_lag_fou sexo_lag_fou tiprel_1mes_lag_fou ult_fec_cli_1t_lag_fou age_lag_fiv antiguedad_lag_fiv canal_entrada_lag_fiv cod_prov_lag_fiv conyuemp_lag_fiv fecha_alta_lag_fiv ind_actividad_cliente_lag_fiv ind_ahor_fin_ult1_lag_fiv ind_aval_fin_ult1_lag_fiv ind_cco_fin_ult1_lag_fiv ind_cder_fin_ult1_lag_fiv ind_cno_fin_ult1_lag_fiv ind_ctju_fin_ult1_lag_fiv ind_ctma_fin_ult1_lag_fiv ind_ctop_fin_ult1_lag_fiv ind_ctpp_fin_ult1_lag_fiv ind_deco_fin_ult1_lag_fiv ind_dela_fin_ult1_lag_fiv ind_deme_fin_ult1_lag_fiv ind_ecue_fin_ult1_lag_fiv ind_empleado_lag_fiv ind_fond_fin_ult1_lag_fiv ind_hip_fin_ult1_lag_fiv ind_nom_pens_ult1_lag_fiv ind_nomina_ult1_lag_fiv ind_nuevo_lag_fiv ind_plan_fin_ult1_lag_fiv ind_pres_fin_ult1_lag_fiv ind_reca_fin_ult1_lag_fiv ind_recibo_ult1_lag_fiv ind_tjcr_fin_ult1_lag_fiv ind_valo_fin_ult1_lag_fiv ind_viv_fin_ult1_lag_fiv indext_lag_fiv indfall_lag_fiv indrel_lag_fiv indrel_1mes_lag_fiv indresi_lag_fiv nomprov_lag_fiv pais_residencia_lag_fiv renta_lag_fiv segmento_lag_fiv sexo_lag_fiv tiprel_1mes_lag_fiv ult_fec_cli_1t_lag_fiv
0 2 34 150 20 2 1012 1 3 0 0 0 0 0 1 30 36 189517 2 1 1 -2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 2 34 150 20 2 1012 1 3 0 0 0 0 0 1 30 36 189517 2 1 1 -2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 3 34 122 20 2 1012 1 3 0 0 0 0 0 1 30 36 107894 1 1 0 -2 37.0 34.0 122.0 20.0 2.0 981.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -33.0 37.0 34.0 122.0 20.0 2.0 951.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -63.0 37.0 34.0 122.0 20.0 2.0 920.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -94.0 37.0 34.0 122.0 20.0 2.0 892.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -122.0 37.0 34.0 122.0 20.0 2.0 861.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
3 3 34 122 20 2 1012 1 3 0 0 0 0 0 1 30 36 107894 1 1 0 -2 37.0 34.0 122.0 20.0 2.0 981.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -33.0 37.0 34.0 122.0 20.0 2.0 951.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -63.0 37.0 34.0 122.0 20.0 2.0 920.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -94.0 37.0 34.0 122.0 20.0 2.0 892.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -122.0 37.0 34.0 122.0 20.0 2.0 861.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
4 4 34 122 20 2 1012 1 3 0 0 0 0 0 1 30 36 93847 1 0 0 -2 40.0 34.0 122.0 20.0 2.0 981.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -33.0 40.0 34.0 122.0 20.0 2.0 951.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -63.0 40.0 34.0 122.0 20.0 2.0 920.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -94.0 40.0 34.0 122.0 20.0 2.0 892.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -122.0 40.0 34.0 122.0 20.0 2.0 861.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -153.0

In [30]:
trn = trn.drop(['sexo_lag_one', 'sexo_lag_two','sexo_lag_thr', 'sexo_lag_fou', 'sexo_lag_fiv'], 1)

In [34]:
len(np.unique((trn.columns)))


Out[34]:
241

In [35]:
trn = trn.drop(['age_lag_one', 'age_lag_two','age_lag_thr', 'age_lag_fou', 'age_lag_fiv'], 1)

In [36]:
len(np.unique((trn.columns)))


Out[36]:
236

In [ ]:
# trn.head() #cod_prov, fecha, alta, in_empleado, ind_nuevo, indext, indfall, 

trn = trn.drop(['cod_prov_lag_one', 'cod_prov_lag_two','cod_prov_lag_thr', 'cod_prov_lag_fou', 'cod_prov_lag_fiv'], 1)

trn = trn.drop(['fecha_alta_lag_one', 'fecha_alta_lag_two','fecha_alta_lag_thr', 'fecha_alta_lag_fou', 'fecha_alta_lag_fiv'], 1)

trn = trn.drop(['ind_empleado_lag_one', 'ind_empleado_lag_two','ind_empleado_lag_thr', 'ind_empleado_lag_fou', 'ind_empleado_lag_fiv'], 1)

trn = trn.drop(['ind_nuevo_lag_one', 'ind_nuevo_lag_two','ind_nuevo_lag_thr', 'ind_nuevo_lag_fou', 'ind_nuevo_lag_fiv'], 1)

trn = trn.drop(['indext_lag_one', 'indext_lag_two','indext_lag_thr', 'indext_lag_fou', 'indext_lag_fiv'], 1)

trn = trn.drop(['indfall_lag_one', 'indfall_lag_two','indfall_lag_thr', 'indfall_lag_fou', 'indfall_lag_fiv'], 1)

In [58]:
# tst.head() #cod_prov, fecha, alta, in_empleado, ind_nuevo, indext, indfall, 
tst = tst.drop(['sexo_lag_one', 'sexo_lag_two','sexo_lag_thr', 'sexo_lag_fou', 'sexo_lag_fiv'], 1)

tst = tst.drop(['age_lag_one', 'age_lag_two','age_lag_thr', 'age_lag_fou', 'age_lag_fiv'], 1)

tst = tst.drop(['cod_prov_lag_one', 'cod_prov_lag_two','cod_prov_lag_thr', 'cod_prov_lag_fou', 'cod_prov_lag_fiv'], 1)

tst = tst.drop(['fecha_alta_lag_one', 'fecha_alta_lag_two','fecha_alta_lag_thr', 'fecha_alta_lag_fou', 'fecha_alta_lag_fiv'], 1)

tst = tst.drop(['ind_empleado_lag_one', 'ind_empleado_lag_two','ind_empleado_lag_thr', 'ind_empleado_lag_fou', 'ind_empleado_lag_fiv'], 1)

tst = tst.drop(['ind_nuevo_lag_one', 'ind_nuevo_lag_two','ind_nuevo_lag_thr', 'ind_nuevo_lag_fou', 'ind_nuevo_lag_fiv'], 1)

tst = tst.drop(['indext_lag_one', 'indext_lag_two','indext_lag_thr', 'indext_lag_fou', 'indext_lag_fiv'], 1)

tst = tst.drop(['indfall_lag_one', 'indfall_lag_two','indfall_lag_thr', 'indfall_lag_fou', 'indfall_lag_fiv'], 1)

In [ ]:
trn = trn.drop(['fecha_alta_lag_one', 'fecha_alta_lag_two','fecha_alta_lag_thr', 'fecha_alta_lag_fou', 'fecha_alta_lag_fiv'], 1)

In [ ]:
trn = trn.drop(['ind_empleado_lag_one', 'ind_empleado_lag_two','ind_empleado_lag_thr', 'ind_empleado_lag_fou', 'ind_empleado_lag_fiv'], 1)

In [ ]:
trn = trn.drop(['ind_nuevo_lag_one', 'ind_nuevo_lag_two','ind_nuevo_lag_thr', 'ind_nuevo_lag_fou', 'ind_nuevo_lag_fiv'], 1)

In [ ]:
trn = trn.drop(['indext_lag_one', 'indext_lag_two','indext_lag_thr', 'indext_lag_fou', 'indext_lag_fiv'], 1)

In [ ]:
trn = trn.drop(['indfall_lag_one', 'indfall_lag_two','indfall_lag_thr', 'indfall_lag_fou', 'indfall_lag_fiv'], 1)

In [49]:
len(np.unique((trn.columns)))


Out[49]:
206

강사님 Tip

타겟별 누적 합

금융상품의 보유 정보를 좀 더 녹여보고싶음!!


In [71]:
cols = ['ind_ahor_fin_ult1', 'ind_aval_fin_ult1', 'ind_cco_fin_ult1',
        'ind_cder_fin_ult1', 'ind_cno_fin_ult1',  'ind_ctju_fin_ult1',
        'ind_ctma_fin_ult1', 'ind_ctop_fin_ult1', 'ind_ctpp_fin_ult1',
        'ind_deco_fin_ult1', 'ind_deme_fin_ult1', 'ind_dela_fin_ult1',
        'ind_ecue_fin_ult1', 'ind_fond_fin_ult1', 'ind_hip_fin_ult1',
        'ind_plan_fin_ult1', 'ind_pres_fin_ult1', 'ind_reca_fin_ult1',
        'ind_tjcr_fin_ult1', 'ind_valo_fin_ult1', 'ind_viv_fin_ult1',
        'ind_nomina_ult1',   'ind_nom_pens_ult1', 'ind_recibo_ult1']

print(trn.shape, tst.shape)

# 타겟별 누적 합
lags = ['_lag_one','_lag_two','_lag_thr','_lag_fou','_lag_fiv']
for col in cols:
    trn[col+'_sum'] = trn[[col+lag for lag in lags]].sum(axis=1)
    tst[col+'_sum'] = tst[[col+lag for lag in lags]].sum(axis=1)
    
# 월별 누적 합
for lag in lags:
    trn['sum'+lag] = trn[[col+lag for col in cols]].sum(axis=1)
    tst['sum'+lag] = tst[[col+lag for col in cols]].sum(axis=1)
    
print(trn.shape, tst.shape)


(45595, 206) (929615, 206)
(45595, 235) (929615, 235)

In [ ]:


In [ ]:

매개변수 최적화 (모델 최적화) [+1]

- 사용하는 모델의 매개변수를 직접 정의하여 최적의 매개변수 찾아내기
- 참고: scikit learn 홈페이지를 통해 모델별 매개변수 확인 가능
- 힌트: trn/vld logloss 를 비교하여, 모델의 복잡도를 조정하기

In [ ]:
# 입력 : none

# 출력: model instance

In [74]:
for i in range(3, 13):
    print("i의 값은 " + str(i))
    st = time.time()

    rf_model = RandomForestClassifier(max_depth=i, n_jobs=-1, random_state=777)
    fit_and_eval(trn, target, rf_model)
    print("*" * 20)


i의 값은 3
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.6917640820393318
#          Raw  : [1.6873325878812817, 1.6760543222457542, 1.7119053359909591]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.6930915826137196
#          Raw  : [1.6871435665894301, 1.679384087159957, 1.7127470940917713]
==================================================
3 secs
********************
i의 값은 4
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.607264861981496
#          Raw  : [1.5990760373060697, 1.6039240507226946, 1.6187944979157243]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.6116065565306528
#          Raw  : [1.6012177729511254, 1.6096298032679304, 1.6239720933729023]
==================================================
2 secs
********************
i의 값은 5
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.4598439411306032
#          Raw  : [1.4818101715680239, 1.4415990173945326, 1.4561226344292537]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.4691840003529801
#          Raw  : [1.4860200084827035, 1.451500826298241, 1.4700311662779963]
==================================================
3 secs
********************
i의 값은 6
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3672655894088352
#          Raw  : [1.3550333263478045, 1.3805048442669128, 1.3662585976117885]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.383793796138132
#          Raw  : [1.3714761160440958, 1.3986455395006263, 1.3812597328696736]
==================================================
3 secs
********************
i의 값은 7
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2973388729307247
#          Raw  : [1.2904399241330868, 1.2926939961562194, 1.3088826985028679]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3232882705155689
#          Raw  : [1.3165972703157922, 1.3205823395917586, 1.3326852016391557]
==================================================
3 secs
********************
i의 값은 8
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.230295234926633
#          Raw  : [1.2241754971627086, 1.2221933238977483, 1.2445168837194425]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2779515538819621
#          Raw  : [1.2687681858803603, 1.2712548421180669, 1.2938316336474587]
==================================================
3 secs
********************
i의 값은 9
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.1702237472111416
#          Raw  : [1.1686054019891523, 1.1718969317894392, 1.1701689078548339]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2470973617527712
#          Raw  : [1.2352856305514412, 1.2577189919903666, 1.2482874627165055]
==================================================
3 secs
********************
i의 값은 10
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.117607584327823
#          Raw  : [1.1263938879972701, 1.1169023060065728, 1.1095265589796262]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.230658053293503
#          Raw  : [1.2336084341896874, 1.236927010419268, 1.2214387152715538]
==================================================
3 secs
********************
i의 값은 11
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.0606618580115434
#          Raw  : [1.0673124400655407, 1.0680346205484934, 1.0466385134205967]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2337135440545453
#          Raw  : [1.220225966037223, 1.2650399784737429, 1.2158746876526698]
==================================================
3 secs
********************
i의 값은 12
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.0007647753086395
#          Raw  : [0.99251880370967727, 1.0077843638094812, 1.0019911584067605]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2220616883875692
#          Raw  : [1.2052702442573726, 1.2385375272864223, 1.2223772936189128]
==================================================
3 secs
********************

In [55]:
for i in range(3, 15):
    print("i의 값은 " + str(i))
    st = time.time()

    rf_model = RandomForestClassifier(max_depth=9, n_estimators=i, n_jobs=-1, random_state=777)
    fit_and_eval(trn, target, rf_model)
    print("*" * 20)


i의 값은 3
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3113241391287322
#          Raw  : [1.2766681687173667, 1.3272264616138534, 1.3300777870549769]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.4155216099688375
#          Raw  : [1.3691344894255446, 1.4471506442328705, 1.4302796962480979]
==================================================
2 secs
********************
i의 값은 4
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2838478632670112
#          Raw  : [1.2578444140391261, 1.3057244105246346, 1.2879747652372733]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3708457752250516
#          Raw  : [1.3305282955162128, 1.4117094839183635, 1.3702995462405787]
==================================================
2 secs
********************
i의 값은 5
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2763589870052476
#          Raw  : [1.245478719292672, 1.3127448842453837, 1.2708533574776875]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.352926859290949
#          Raw  : [1.3138329847089349, 1.3995870792163505, 1.3453605139475617]
==================================================
2 secs
********************
i의 값은 6
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2757741792335626
#          Raw  : [1.2514110441048278, 1.3031143699100025, 1.2727971236858577]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3468267412069233
#          Raw  : [1.3140254919757857, 1.3815877668850893, 1.344866964759895]
==================================================
2 secs
********************
i의 값은 7
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2647931305520494
#          Raw  : [1.2464972255174758, 1.2933711976260582, 1.2545109685126148]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3360046155874112
#          Raw  : [1.3108226357037192, 1.3674156567921769, 1.3297755542663379]
==================================================
2 secs
********************
i의 값은 8
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2522440223097047
#          Raw  : [1.2282452892828892, 1.2754027499400431, 1.2530840277061819]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3227878637008508
#          Raw  : [1.2935875759795354, 1.3539245806388009, 1.3208514344842159]
==================================================
3 secs
********************
i의 값은 9
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.244790074667987
#          Raw  : [1.2219386662665062, 1.2630027474135781, 1.2494288103238769]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3130669555550742
#          Raw  : [1.2808752969646535, 1.3414627960264083, 1.3168627736741607]
==================================================
3 secs
********************
i의 값은 10
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2379894961043505
#          Raw  : [1.2197392363190847, 1.2501645846487242, 1.2440646673452431]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3075112329145573
#          Raw  : [1.2806154182584841, 1.328716847715194, 1.3132014327699939]
==================================================
3 secs
********************
i의 값은 11
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.237236987710796
#          Raw  : [1.222659067141636, 1.2438353254665768, 1.2452165705241753]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3065624168413443
#          Raw  : [1.2840539685192327, 1.3213814547685876, 1.3142518272362127]
==================================================
3 secs
********************
i의 값은 12
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2370022844575093
#          Raw  : [1.2211253876687824, 1.2408081982271832, 1.2490732674765621]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3061039424552225
#          Raw  : [1.2835349672727667, 1.3188901690164181, 1.3158866910764833]
==================================================
3 secs
********************
i의 값은 13
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2343620860718711
#          Raw  : [1.2209561933708775, 1.2400689778805687, 1.2420610869641673]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3005657863858877
#          Raw  : [1.282326379047807, 1.311123471483117, 1.3082475086267393]
==================================================
3 secs
********************
i의 값은 14
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************

In [56]:
for i in range(0, 8):
    print("i의 값은 " + str(i))
    st = time.time()

    rf_model = RandomForestClassifier(max_depth=9, n_estimators=14, verbose = i, n_jobs=-1, random_state=777)
    fit_and_eval(trn, target, rf_model)
    print("*" * 20)


i의 값은 0
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 1
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 2
building tree 1 of 14
building tree 2 of 14
building tree 3 of 14
building tree 4 of 14
building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 3
building tree 1 of 14
building tree 2 of 14
building tree 3 of 14
building tree 4 of 14
building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  12 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  12 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  12 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  12 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 4
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  11 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  11 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14

building tree 3 of 14
building tree 4 of 14
building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  11 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  11 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 5
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 6
building tree 1 of 14
building tree 2 of 14
building tree 3 of 14
building tree 4 of 14
building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.0s
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************
i의 값은 7
building tree 1 of 14
building tree 2 of 14
building tree 3 of 14
building tree 4 of 14
building tree 5 of 14building tree 6 of 14

building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
building tree 1 of 14building tree 2 of 14
building tree 3 of 14
building tree 4 of 14

building tree 5 of 14
building tree 6 of 14
building tree 7 of 14
building tree 8 of 14
building tree 9 of 14
building tree 10 of 14
building tree 11 of 14
building tree 12 of 14
building tree 13 of 14
building tree 14 of 14
[Parallel(n_jobs=-1)]: Done  10 out of  14 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  14 out of  14 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Done  10 out of  14 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=4)]: Done  14 out of  14 | elapsed:    0.0s finished
==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2325767418519622
#          Raw  : [1.2187288166957797, 1.2415342570257937, 1.2374671518343132]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.2992265339759328
#          Raw  : [1.2803575368934215, 1.313092085419012, 1.3042299796153649]
==================================================
3 secs
********************

In [ ]:


In [83]:
st = time.time()

et_model = ExtraTreesClassifier(max_depth=7, n_jobs=-1, random_state=777)
fit_and_eval(trn, target, et_model)


==================================================
TRAIN EVAL
--------------------------------------------------
# log loss
#          Mean : 1.350351651905559
#          Raw  : [1.344456508193496, 1.382822898884797, 1.3237755486383842]
==================================================
VALID EVAL
--------------------------------------------------
# log loss
#          Mean : 1.3758350517852571
#          Raw  : [1.3671488987996656, 1.4126798157501401, 1.3476764408059652]
==================================================
3 secs

In [ ]:

캐글에 직접 결과물 제출하기

- MAP@7 평가척도를 기반 (https://www.kaggle.com/c/santander-product-recommendation/details/evaluation)
- 유저당 상위 7개의 제품을 추천해야함

In [14]:
# 최종 모델 정의하기
model = RandomForestClassifier(max_depth=20, n_jobs=-1, random_state=777)

In [75]:
# 최종 모델 정의하기
model = RandomForestClassifier(max_depth=9, n_jobs=-1, random_state=777)

In [67]:
model = ExtraTreesClassifier(max_depth=7, n_jobs=-1, random_state=777)

In [76]:
from datetime import datetime
import os

print('='*50)
print('# Test shape : {}'.format(tst.shape))

model.fit(trn,target)

preds = model.predict_proba(tst)
preds = np.fliplr(np.argsort(preds, axis=1))


==================================================
# Test shape : (929615, 235)

In [77]:
cols = ['ind_ahor_fin_ult1', 'ind_aval_fin_ult1', 'ind_cco_fin_ult1',
        'ind_cder_fin_ult1', 'ind_cno_fin_ult1',  'ind_ctju_fin_ult1',
        'ind_ctma_fin_ult1', 'ind_ctop_fin_ult1', 'ind_ctpp_fin_ult1',
        'ind_deco_fin_ult1', 'ind_deme_fin_ult1', 'ind_dela_fin_ult1',
        'ind_ecue_fin_ult1', 'ind_fond_fin_ult1', 'ind_hip_fin_ult1',
        'ind_plan_fin_ult1', 'ind_pres_fin_ult1', 'ind_reca_fin_ult1',
        'ind_tjcr_fin_ult1', 'ind_valo_fin_ult1', 'ind_viv_fin_ult1',
        'ind_nomina_ult1',   'ind_nom_pens_ult1', 'ind_recibo_ult1']
target_cols = [cols[i] for i, col in enumerate(cols) if i in rem_targets]

In [78]:
final_preds = []
for pred in preds:
    top_products = []
    for i, product in enumerate(pred):
        top_products.append(target_cols[product])
        if i == 6:
            break
    final_preds.append(' '.join(top_products))

out_df = pd.DataFrame({'ncodpers':test_id, 'added_products':final_preds})
file_name = datetime.now().strftime("result_%Y%m%d%H%M%S") + '.csv'
out_df.to_csv(os.path.join('../output',file_name), index=False)

In [ ]:


In [ ]:


In [ ]:

나만의 머신러닝 파이프라인 흐름도(Flow Chart) 기록하기

  • 원천 데이터

    • .
  • 전처리

    • .
  • 피쳐 엔지니어링 이전 데이터 dimension

    • .
  • 피쳐 엔지니어링

    • .
  • 피쳐 엔지니어링 이후 데이터 dimension

    • .
  • 모델 튜닝

    • .
  • 검증 결과

    • .
  • 실제 결과

    • .

예시

  • 전처리
    • 결측값을 .fillna 함수를 통해 0으로 대체. (기존 데이터에 0이 존재할 경우 -1로 대체)
  • 피쳐 엔지니어링 이전 데이터 dimension:
    • trn : (45619, 246)
    • target : (45619, 1) [18 classes]
    • tst : (929615, 246)
  • 피쳐 엔지니어링
    • age_log : log(age + 1)
    • ind..._lag_one : 5월 사용자별 금융상품 보유현황
    • ind..._lag_two : 4월 사용자별 금융상품 보유현황
    • ind..._lag_thr : 3월 사용자별 금융상품 보유현황
  • 피쳐 엔지니어링 이후 데이터 dimension:
    • trn : (45619, 250)
    • target : (45619, 1) [18 classes]
    • tst : (929615, 250)
  • 모델 튜닝
    • RandomForest : max_depth = 20 로 복잡도 조정
  • 검증 결과
    • trn logloss : 1.18
    • vld logloss : 1.28
  • 실제 결과
    • Public Leaderboard : 0.025984

In [ ]:

Appendix

- RandomForest vs ExtraTrees 의 차이란?
    - P. Geurts, D. Ernst., and L. Wehenkel, “Extremely randomized trees”, Machine Learning, 63(1), 3-42, 2006
    - 1) ET의 경우, 변수 샘플링을 boostrap 샘플이 아닌 전체 데이터에서 취한다.
    - 2) ET의 경우, 샘플내 분포에 상관없이 완전한 임의 샘플링으로 데이터 샘플을 취한다.

In [ ]:


In [ ]:


In [33]:
print(trn.columns)


Index(['age', 'antiguedad', 'canal_entrada', 'cod_prov', 'conyuemp',
       'fecha_alta', 'ind_actividad_cliente', 'ind_empleado', 'ind_nuevo',
       'indext',
       ...
       'indrel_lag_fiv', 'indrel_1mes_lag_fiv', 'indresi_lag_fiv',
       'nomprov_lag_fiv', 'pais_residencia_lag_fiv', 'renta_lag_fiv',
       'segmento_lag_fiv', 'sexo_lag_fiv', 'tiprel_1mes_lag_fiv',
       'ult_fec_cli_1t_lag_fiv'],
      dtype='object', length=246)

In [34]:
trn.head()


Out[34]:
age antiguedad canal_entrada cod_prov conyuemp fecha_alta ind_actividad_cliente ind_empleado ind_nuevo indext ... indrel_lag_fiv indrel_1mes_lag_fiv indresi_lag_fiv nomprov_lag_fiv pais_residencia_lag_fiv renta_lag_fiv segmento_lag_fiv sexo_lag_fiv tiprel_1mes_lag_fiv ult_fec_cli_1t_lag_fiv
0 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 28 34 150 20 2 1012 1 3 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
3 37 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
4 40 34 122 20 2 1012 1 3 0 0 ... 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -153.0

5 rows × 246 columns


In [38]:
trn.ix[:5,23:]


Out[38]:
canal_entrada_lag_one cod_prov_lag_one conyuemp_lag_one fecha_alta_lag_one ind_actividad_cliente_lag_one ind_ahor_fin_ult1_lag_one ind_aval_fin_ult1_lag_one ind_cco_fin_ult1_lag_one ind_cder_fin_ult1_lag_one ind_cno_fin_ult1_lag_one ... indrel_lag_fiv indrel_1mes_lag_fiv indresi_lag_fiv nomprov_lag_fiv pais_residencia_lag_fiv renta_lag_fiv segmento_lag_fiv sexo_lag_fiv tiprel_1mes_lag_fiv ult_fec_cli_1t_lag_fiv
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 122.0 20.0 2.0 981.0 1.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
3 122.0 20.0 2.0 981.0 1.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 1.0 30.0 36.0 107894.0 1.0 1.0 0.0 -153.0
4 122.0 20.0 2.0 981.0 1.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 1.0 30.0 36.0 93847.0 1.0 0.0 0.0 -153.0
5 122.0 21.0 2.0 981.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 1.0 31.0 36.0 54195.0 1.0 1.0 1.0 -153.0

6 rows × 223 columns


In [ ]:

xgboost

num_round : 최대 500

early_stop : 일찍 끝나는 시기

gbtree / gblinear 트리 / 선형

max_depth : 높을수록 복잡

subsample : 새 트리를 만들 때 전체 데이터의 ~%를 던져줄 것인가

dmatrix로 변해야 함

watch_list -> 봐야되는 값들

fit대신 train이란 용어를 씀

이터레이션 평균값에 0.9를 나눠줌(90%의 데이터를 사용한 값이니까!! 나중엔 100%를 훈련시킴)


In [ ]: